From 90dc7ee7564493109059753d34d070e4d7236b43 Mon Sep 17 00:00:00 2001
From: AidanNell <anell@vectice.com>
Date: Tue, 9 Jul 2024 16:50:15 +0700
Subject: [PATCH 01/20] Validation suite modules

---
 .../binary_classification_full_suite.py       |  26 +++
 .../data_privacy_full_suite.py                | 161 ++++++++++++++++++
 .../data_quality_full_suite.py                | 127 ++++++++++++++
 .../validation_suites/master_test_suites.py   |  23 +++
 .../regression_full_suite.py                  |  26 +++
 .../time_series_full_suite.py                 |  24 +++
 6 files changed, 387 insertions(+)
 create mode 100644 24.2/samples/validation_suites/binary_classification_full_suite.py
 create mode 100644 24.2/samples/validation_suites/data_privacy_full_suite.py
 create mode 100644 24.2/samples/validation_suites/data_quality_full_suite.py
 create mode 100644 24.2/samples/validation_suites/master_test_suites.py
 create mode 100644 24.2/samples/validation_suites/regression_full_suite.py
 create mode 100644 24.2/samples/validation_suites/time_series_full_suite.py

diff --git a/24.2/samples/validation_suites/binary_classification_full_suite.py b/24.2/samples/validation_suites/binary_classification_full_suite.py
new file mode 100644
index 0000000..8427247
--- /dev/null
+++ b/24.2/samples/validation_suites/binary_classification_full_suite.py
@@ -0,0 +1,26 @@
+# import the Vectice provided binary classification tests
+from vectice.models.test_library.binary_classification_test import (
+    plot_roc_curve,
+    conf_matrix,
+    explainability,
+    feature_importance,
+    label_drift,
+    prediction_drift,
+)
+
+# Map the tests to be used
+BINARY_CLASSIFICATION_FULL_SUITE_MAP_TEST = {
+    "roc": plot_roc_curve,
+    "cm": conf_matrix,
+    "explainability": explainability,
+    "feature_importance": feature_importance,
+    "drift": [label_drift, prediction_drift],
+    "binary_full_suite": [
+        plot_roc_curve,
+        conf_matrix,
+        explainability,
+        feature_importance,
+        label_drift,
+        prediction_drift,
+    ],
+}
diff --git a/24.2/samples/validation_suites/data_privacy_full_suite.py b/24.2/samples/validation_suites/data_privacy_full_suite.py
new file mode 100644
index 0000000..4fd2215
--- /dev/null
+++ b/24.2/samples/validation_suites/data_privacy_full_suite.py
@@ -0,0 +1,161 @@
+# Write custom tests which can be used to validate your datasets security
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pandas as pd
+
+if TYPE_CHECKING:
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+
+def sensitive_data_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+    sensitive_keywords: list | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None or sensitive_keywords is None:
+        return None
+
+    # Initialize a dictionary to hold counts of sensitive data
+    sensitive_counts = {keyword: 0 for keyword in sensitive_keywords}
+
+    # Check each cell in the DataFrame for sensitive keywords
+    for keyword in sensitive_keywords:
+        sensitive_counts[keyword] = dataset.apply(
+            lambda x: x.astype(str).str.contains(keyword, case=False).sum()
+        ).sum()
+
+    # Create a DataFrame with the results
+    sensitive_counts_df = pd.DataFrame(
+        {
+            "Sensitive Keyword": list(sensitive_counts.keys()),
+            "Count": list(sensitive_counts.values()),
+        }
+    )
+
+    table = Table(sensitive_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+def pii_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    # Define common PII patterns
+    pii_patterns = {
+        "name": r"\b[A-Z][a-z]*\b",
+        "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b",
+        "phone": r"\b(\+?[\d]{1,3}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,9})\b",
+    }
+
+    # Initialize a dictionary to hold counts of PII matches
+    pii_counts = {key: 0 for key in pii_patterns.keys()}
+
+    # Check each column in the DataFrame for PII patterns
+    for column in dataset.columns:
+        for key, pattern in pii_patterns.items():
+            pii_counts[key] += (
+                dataset[column]
+                .astype(str)
+                .str.contains(pattern, case=False, regex=True)
+                .sum()
+            )
+
+    # Create a DataFrame with the results
+    pii_counts_df = pd.DataFrame(
+        {"PII Type": list(pii_counts.keys()), "Count": list(pii_counts.values())}
+    )
+
+    table = Table(pii_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+def sensitive_data_type_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    # Define patterns for sensitive data types
+    sensitive_data_patterns = {
+        "credit_card": r"\b(?:\d[ -]*?){13,16}\b",
+        "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
+    }
+
+    # Initialize a dictionary to hold counts of sensitive data type matches
+    sensitive_data_counts = {key: 0 for key in sensitive_data_patterns.keys()}
+
+    # Check each column in the DataFrame for sensitive data type patterns
+    for column in dataset.columns:
+        for key, pattern in sensitive_data_patterns.items():
+            sensitive_data_counts[key] += (
+                dataset[column]
+                .astype(str)
+                .str.contains(pattern, case=False, regex=True)
+                .sum()
+            )
+
+    # Create a DataFrame with the results
+    sensitive_data_counts_df = pd.DataFrame(
+        {
+            "Sensitive Data Type": list(sensitive_data_counts.keys()),
+            "Count": list(sensitive_data_counts.values()),
+        }
+    )
+
+    table = Table(sensitive_data_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+# Map the tests to be used
+DATA_PRIVACY_SUITE_MAP_TEST = {
+    "sensitive_data_check": sensitive_data_check,
+    "pii_check": pii_check,
+    "sensitive_data_type_check": sensitive_data_type_check,
+    "data_privacy_full_suite": [
+        sensitive_data_check,
+        pii_check,
+        sensitive_data_type_check,
+    ],
+}
diff --git a/24.2/samples/validation_suites/data_quality_full_suite.py b/24.2/samples/validation_suites/data_quality_full_suite.py
new file mode 100644
index 0000000..b946b94
--- /dev/null
+++ b/24.2/samples/validation_suites/data_quality_full_suite.py
@@ -0,0 +1,127 @@
+# Write custom tests which can be used to validate your datasets quality
+from __future__ import annotations
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from vectice.models.validation_dataset import TestSuiteReturnType
+
+
+# custom test which can be used for dataset validation
+def test_dataset_split(
+    dataset: DataFrame | None,
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    feature_columns: list | None = None,
+    threshold: float | None = None,
+) -> TestSuiteReturnType:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    total_df = len(training_df) + len(testing_df)
+
+    # Create a DataFrame with the results
+    datasplit_df = pd.DataFrame(
+        {
+            "Dataset": ["Train", "Test", "Total"],
+            "Size": [len(training_df), len(testing_df), total_df],
+            "Percentage": [
+                (len(training_df) / total_df * 100),
+                (len(testing_df) / total_df * 100),
+                100,
+            ],
+        }
+    )
+
+    table = Table(datasplit_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+# custom test which can be used for dataset validation
+def iqr_and_outliers(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: list | None = None,
+    target_column: str | None = None,
+    threshold: float | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    files = []
+    # disable plots showing
+    plt.ioff()
+    for column in dataset.select_dtypes(include=[np.number]).columns:
+        file_name = f"iqr_and_outliers_{column}.png"
+
+        temp_file_path = file_name
+
+        Q1 = dataset[column].quantile(0.25)
+        Q3 = dataset[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+
+        plt.figure(figsize=(10, 6))
+        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
+        plt.axvline(
+            Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}"
+        )
+        plt.axvline(
+            Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}"
+        )
+        plt.axvline(
+            dataset[column].median(),
+            color="g",
+            linestyle="-",
+            label=f"Median: {dataset[column].median():.2f}",
+        )
+        plt.fill_betweenx(
+            [0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}"
+        )
+
+        # Highlight outliers
+        outliers = dataset[
+            (dataset[column] < lower_bound) | (dataset[column] > upper_bound)
+        ][column]
+        plt.scatter(
+            outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5
+        )
+
+        plt.title(f"Histogram with IQR and Outliers for {column}")
+        plt.xlabel(column)
+        plt.ylabel("Frequency")
+        plt.legend()
+        plt.savefig(temp_file_path, bbox_inches="tight")
+        files.append(temp_file_path)
+
+    plt.ion()
+    return TestSuiteReturnType(
+        properties={},
+        tables=[],
+        attachments=files,
+    )
+
+
+# Map the tests to be used
+DATA_QUALITY_SUITE_MAP_TEST = {
+    "dataset_split": test_dataset_split,
+    "iqr_and_outliers": iqr_and_outliers,
+    "full_dataset_validation": [
+        test_dataset_split,
+        iqr_and_outliers,
+    ],
+}
diff --git a/24.2/samples/validation_suites/master_test_suites.py b/24.2/samples/validation_suites/master_test_suites.py
new file mode 100644
index 0000000..e6c8c41
--- /dev/null
+++ b/24.2/samples/validation_suites/master_test_suites.py
@@ -0,0 +1,23 @@
+# Vectice provided model validation tests
+from binary_classification_full_suite import BINARY_CLASSIFICATION_FULL_SUITE_MAP_TEST
+
+# custom data quality tests
+from data_quality_full_suite import (
+    test_dataset_split,
+    iqr_and_outliers,
+)
+
+
+# The master test suite file is used to map all tests which can be run.
+# The tests can be provided by Vectice or custom functions from your test suite modules.
+# Vectice uses this configuration to simply identify available tests, when you run
+# your validations in your notebook.
+
+# Accumulation and mapping of all tests to be run
+MASTER_FULL_SUITE_MAP_TEST = {
+    "binary_full_suite": BINARY_CLASSIFICATION_FULL_SUITE_MAP_TEST["binary_full_suite"],
+    "full_dataset_validation": [
+        test_dataset_split,
+        iqr_and_outliers,
+    ],
+}
diff --git a/24.2/samples/validation_suites/regression_full_suite.py b/24.2/samples/validation_suites/regression_full_suite.py
new file mode 100644
index 0000000..d27e00e
--- /dev/null
+++ b/24.2/samples/validation_suites/regression_full_suite.py
@@ -0,0 +1,26 @@
+# import the Vectice provided test
+from vectice.models.test_library.regression_test import (
+    plot_residuals,
+    r2_score,
+    explainability,
+    feature_importance,
+    target_drift,
+    prediction_drift,
+)
+
+# Map the tests to be used
+REGRESSION_FULL_SUITE_MAP_TEST = {
+    "roc": plot_residuals,
+    "cm": r2_score,
+    "explainability": explainability,
+    "feature_importance": feature_importance,
+    "drift": [target_drift, prediction_drift],
+    "binary_full_suite": [
+        plot_residuals,
+        r2_score,
+        explainability,
+        feature_importance,
+        target_drift,
+        prediction_drift,
+    ],
+}
diff --git a/24.2/samples/validation_suites/time_series_full_suite.py b/24.2/samples/validation_suites/time_series_full_suite.py
new file mode 100644
index 0000000..0fd7b0d
--- /dev/null
+++ b/24.2/samples/validation_suites/time_series_full_suite.py
@@ -0,0 +1,24 @@
+# import the Vectice provided time series tests
+from vectice.models.test_library.time_series_test import (
+    trend_analysis,
+    seasonality_check,
+    autocorrelation_test,
+    stationarity_test,
+    missing_value_analysis,
+)
+
+# Map the tests to be used
+TIME_SERIES_FULL_SUITE_MAP_TEST = {
+    "trend": trend_analysis,
+    "seasonality": seasonality_check,
+    "autocorrelation": autocorrelation_test,
+    "stationarity": stationarity_test,
+    "missing_value": missing_value_analysis,
+    "time_series_full_suite": [
+        trend_analysis,
+        seasonality_check,
+        autocorrelation_test,
+        stationarity_test,
+        missing_value_analysis,
+    ],
+}

From 59a31342dced9f6ee904b094a3228e3b5df0a993 Mon Sep 17 00:00:00 2001
From: AidanNell <anell@vectice.com>
Date: Wed, 10 Jul 2024 09:03:21 +0700
Subject: [PATCH 02/20] updated file structure

---
 .../binary_classification_full_suite.py       | 26 ------
 ...cy_full_suite.py => data_privacy_tests.py} | 13 ---
 ...ty_full_suite.py => data_quality_tests.py} | 11 ---
 .../validation_suites/master_test_suites.py   | 80 +++++++++++++++++--
 .../regression_full_suite.py                  | 26 ------
 .../time_series_full_suite.py                 | 24 ------
 6 files changed, 74 insertions(+), 106 deletions(-)
 delete mode 100644 24.2/samples/validation_suites/binary_classification_full_suite.py
 rename 24.2/samples/validation_suites/{data_privacy_full_suite.py => data_privacy_tests.py} (93%)
 rename 24.2/samples/validation_suites/{data_quality_full_suite.py => data_quality_tests.py} (93%)
 delete mode 100644 24.2/samples/validation_suites/regression_full_suite.py
 delete mode 100644 24.2/samples/validation_suites/time_series_full_suite.py

diff --git a/24.2/samples/validation_suites/binary_classification_full_suite.py b/24.2/samples/validation_suites/binary_classification_full_suite.py
deleted file mode 100644
index 8427247..0000000
--- a/24.2/samples/validation_suites/binary_classification_full_suite.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# import the Vectice provided binary classification tests
-from vectice.models.test_library.binary_classification_test import (
-    plot_roc_curve,
-    conf_matrix,
-    explainability,
-    feature_importance,
-    label_drift,
-    prediction_drift,
-)
-
-# Map the tests to be used
-BINARY_CLASSIFICATION_FULL_SUITE_MAP_TEST = {
-    "roc": plot_roc_curve,
-    "cm": conf_matrix,
-    "explainability": explainability,
-    "feature_importance": feature_importance,
-    "drift": [label_drift, prediction_drift],
-    "binary_full_suite": [
-        plot_roc_curve,
-        conf_matrix,
-        explainability,
-        feature_importance,
-        label_drift,
-        prediction_drift,
-    ],
-}
diff --git a/24.2/samples/validation_suites/data_privacy_full_suite.py b/24.2/samples/validation_suites/data_privacy_tests.py
similarity index 93%
rename from 24.2/samples/validation_suites/data_privacy_full_suite.py
rename to 24.2/samples/validation_suites/data_privacy_tests.py
index 4fd2215..90d851f 100644
--- a/24.2/samples/validation_suites/data_privacy_full_suite.py
+++ b/24.2/samples/validation_suites/data_privacy_tests.py
@@ -146,16 +146,3 @@ def sensitive_data_type_check(
         tables=[table],
         attachments=[],
     )
-
-
-# Map the tests to be used
-DATA_PRIVACY_SUITE_MAP_TEST = {
-    "sensitive_data_check": sensitive_data_check,
-    "pii_check": pii_check,
-    "sensitive_data_type_check": sensitive_data_type_check,
-    "data_privacy_full_suite": [
-        sensitive_data_check,
-        pii_check,
-        sensitive_data_type_check,
-    ],
-}
diff --git a/24.2/samples/validation_suites/data_quality_full_suite.py b/24.2/samples/validation_suites/data_quality_tests.py
similarity index 93%
rename from 24.2/samples/validation_suites/data_quality_full_suite.py
rename to 24.2/samples/validation_suites/data_quality_tests.py
index b946b94..05b3ae5 100644
--- a/24.2/samples/validation_suites/data_quality_full_suite.py
+++ b/24.2/samples/validation_suites/data_quality_tests.py
@@ -114,14 +114,3 @@ def iqr_and_outliers(
         tables=[],
         attachments=files,
     )
-
-
-# Map the tests to be used
-DATA_QUALITY_SUITE_MAP_TEST = {
-    "dataset_split": test_dataset_split,
-    "iqr_and_outliers": iqr_and_outliers,
-    "full_dataset_validation": [
-        test_dataset_split,
-        iqr_and_outliers,
-    ],
-}
diff --git a/24.2/samples/validation_suites/master_test_suites.py b/24.2/samples/validation_suites/master_test_suites.py
index e6c8c41..25c9ea0 100644
--- a/24.2/samples/validation_suites/master_test_suites.py
+++ b/24.2/samples/validation_suites/master_test_suites.py
@@ -1,22 +1,90 @@
-# Vectice provided model validation tests
-from binary_classification_full_suite import BINARY_CLASSIFICATION_FULL_SUITE_MAP_TEST
+# import the Vectice provided probability of default tests
+from vectice.models.test_library.probability_of_default_test import (
+    plot_roc_curve,
+    conf_matrix,
+    explainability,
+    feature_importance,
+    label_drift,
+    prediction_drift,
+)
+
+# import the Vectice provided regression tests
+from vectice.models.test_library.regression_test import (
+    plot_residuals,
+    r2_score,
+    explainability,
+    feature_importance,
+    target_drift,
+    prediction_drift,
+)
+
+# import the Vectice provided time series tests
+from vectice.models.test_library.time_series_test import (
+    trend_analysis,
+    seasonality_check,
+    autocorrelation_test,
+    stationarity_test,
+    missing_value_analysis,
+)
+
 
 # custom data quality tests
-from data_quality_full_suite import (
+from data_quality_tests import (
     test_dataset_split,
     iqr_and_outliers,
 )
 
 
+# Map the tests to be used for regression
+REGRESSION_FULL_SUITE_MAP_TEST = {
+    "roc": plot_residuals,
+    "cm": r2_score,
+    "explainability": explainability,
+    "feature_importance": feature_importance,
+    "drift": [target_drift, prediction_drift],
+    "binary_full_suite": [
+        plot_residuals,
+        r2_score,
+        explainability,
+        feature_importance,
+        target_drift,
+        prediction_drift,
+    ],
+}
+
+# Map the tests to be used for time series
+TIME_SERIES_FULL_SUITE_MAP_TEST = {
+    "trend": trend_analysis,
+    "seasonality": seasonality_check,
+    "autocorrelation": autocorrelation_test,
+    "stationarity": stationarity_test,
+    "missing_value": missing_value_analysis,
+    "time_series_full_suite": [
+        trend_analysis,
+        seasonality_check,
+        autocorrelation_test,
+        stationarity_test,
+        missing_value_analysis,
+    ],
+}
+
+
 # The master test suite file is used to map all tests which can be run.
 # The tests can be provided by Vectice or custom functions from your test suite modules.
 # Vectice uses this configuration to simply identify available tests, when you run
 # your validations in your notebook.
 
 # Accumulation and mapping of all tests to be run
-MASTER_FULL_SUITE_MAP_TEST = {
-    "binary_full_suite": BINARY_CLASSIFICATION_FULL_SUITE_MAP_TEST["binary_full_suite"],
-    "full_dataset_validation": [
+MASTER_SUITE_MAP_TEST = {
+    "probability_of_default_validation": [
+        plot_roc_curve,
+        conf_matrix,
+        explainability,
+        feature_importance,
+        label_drift,
+        prediction_drift,
+    ],
+    "data_quality": [
         test_dataset_split,
         iqr_and_outliers,
     ],
diff --git a/24.2/samples/validation_suites/regression_full_suite.py b/24.2/samples/validation_suites/regression_full_suite.py
deleted file mode 100644
index d27e00e..0000000
--- a/24.2/samples/validation_suites/regression_full_suite.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# import the Vectice provided test
-from vectice.models.test_library.regression_test import (
-    plot_residuals,
-    r2_score,
-    explainability,
-    feature_importance,
-    target_drift,
-    prediction_drift,
-)
-
-# Map the tests to be used
-REGRESSION_FULL_SUITE_MAP_TEST = {
-    "roc": plot_residuals,
-    "cm": r2_score,
-    "explainability": explainability,
-    "feature_importance": feature_importance,
-    "drift": [target_drift, prediction_drift],
-    "binary_full_suite": [
-        plot_residuals,
-        r2_score,
-        explainability,
-        feature_importance,
-        target_drift,
-        prediction_drift,
-    ],
-}
diff --git a/24.2/samples/validation_suites/time_series_full_suite.py b/24.2/samples/validation_suites/time_series_full_suite.py
deleted file mode 100644
index 0fd7b0d..0000000
--- a/24.2/samples/validation_suites/time_series_full_suite.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# import the Vectice provided time series tests
-from vectice.models.test_library.time_series_test import (
-    trend_analysis,
-    seasonality_check,
-    autocorrelation_test,
-    stationarity_test,
-    missing_value_analysis,
-)
-
-# Map the tests to be used
-TIME_SERIES_FULL_SUITE_MAP_TEST = {
-    "trend": trend_analysis,
-    "seasonality": seasonality_check,
-    "autocorrelation": autocorrelation_test,
-    "stationarity": stationarity_test,
-    "missing_value": missing_value_analysis,
-    "time_series_full_suite": [
-        trend_analysis,
-        seasonality_check,
-        autocorrelation_test,
-        stationarity_test,
-        missing_value_analysis,
-    ],
-}

From a8fa33fee8549e1ccdb73fab3bc7a9558af825d4 Mon Sep 17 00:00:00 2001
From: AidanNell <anell@vectice.com>
Date: Wed, 10 Jul 2024 09:10:01 +0700
Subject: [PATCH 03/20] fixed suite map in master file

---
 .../validation_suites/master_test_suites.py   | 41 +++++++++++++++----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/24.2/samples/validation_suites/master_test_suites.py b/24.2/samples/validation_suites/master_test_suites.py
index 25c9ea0..6370541 100644
--- a/24.2/samples/validation_suites/master_test_suites.py
+++ b/24.2/samples/validation_suites/master_test_suites.py
@@ -1,4 +1,4 @@
-# import the Vectice provided probability of default tests
+# import the Vectice provided probability of default validation tests
 from vectice.models.test_library.probability_of_default_test import (
     plot_roc_curve,
     conf_matrix,
@@ -8,7 +8,7 @@
     prediction_drift,
 )
 
-# import the Vectice provided regression tests
+# import the Vectice provided regression validation tests
 from vectice.models.test_library.regression_test import (
     plot_residuals,
     r2_score,
@@ -18,7 +18,7 @@
     prediction_drift,
 )
 
-# import the Vectice provided time series tests
+# import the Vectice provided time series validation tests
 from vectice.models.test_library.time_series_test import (
     trend_analysis,
     seasonality_check,
@@ -28,14 +28,20 @@
 )
 
 
-# custom data quality tests
+# custom data quality validation tests
 from data_quality_tests import (
     test_dataset_split,
     iqr_and_outliers,
 )
 
+# custom data privacy validation tests
+from data_privacy_tests import (
+    sensitive_data_check,
+    sensitive_data_type_check,
+    pii_check,
+)
 
-# Map the tests to be used for regression
+# Map the tests to be used for regression validation
 REGRESSION_FULL_SUITE_MAP_TEST = {
     "roc": plot_residuals,
     "cm": r2_score,
@@ -52,7 +58,7 @@
     ],
 }
 
-# Map the tests to be used for time series
+# Map the tests to be used for time series validation
 TIME_SERIES_FULL_SUITE_MAP_TEST = {
     "trend": trend_analysis,
     "seasonality": seasonality_check,
@@ -68,13 +74,34 @@
     ],
 }
 
+# Map the tests to be used for data quality
+DATA_QUALITY_SUITE_MAP_TEST = {
+    "dataset_split": test_dataset_split,
+    "iqr_and_outliers": iqr_and_outliers,
+    "full_dataset_validation": [
+        test_dataset_split,
+        iqr_and_outliers,
+    ],
+}
+
+# Map the tests to be used for data privacy validation
+DATA_PRIVACY_SUITE_MAP_TEST = {
+    "sensitive_data_check": sensitive_data_check,
+    "pii_check": pii_check,
+    "sensitive_data_type_check": sensitive_data_type_check,
+    "data_privacy_full_suite": [
+        sensitive_data_check,
+        pii_check,
+        sensitive_data_type_check,
+    ],
+}
 
 # The master test suite file is used to map all tests which can be run.
 # The tests can be provided by Vectice or custom functions from your test suite modules.
 # Vectice uses this configuration to simply identify available tests, when you run
 # your validations in your notebook.
 
-# Accumulation and mapping of all tests to be run
+# Accumulation and mapping of all validation tests to be run
 MASTER_SUITE_MAP_TEST = {
     "probability_of_default_validation": [
         plot_roc_curve,

From 79c2591c3fd84db8902c088d22eaf8ff7c0dc7a7 Mon Sep 17 00:00:00 2001
From: AidanNell <anell@vectice.com>
Date: Wed, 10 Jul 2024 09:44:20 +0700
Subject: [PATCH 04/20] updated naming

---
 24.2/samples/validation_suites/master_test_suites.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/24.2/samples/validation_suites/master_test_suites.py b/24.2/samples/validation_suites/master_test_suites.py
index 6370541..b79944b 100644
--- a/24.2/samples/validation_suites/master_test_suites.py
+++ b/24.2/samples/validation_suites/master_test_suites.py
@@ -1,5 +1,5 @@
 # import the Vectice provided probability of default validation tests
-from vectice.models.test_library.probability_of_default_test import (
+from vectice.models.test_library.binary_classification_test import (
     plot_roc_curve,
     conf_matrix,
     explainability,
@@ -102,8 +102,8 @@
 # your validations in your notebook.
 
 # Accumulation and mapping of all validation tests to be run
-MASTER_SUITE_MAP_TEST = {
-    "probability_of_default_validation": [
+CUSTOM_TEST_PD_MODEL = {
+    "binary_suite": [
         plot_roc_curve,
         conf_matrix,
         explainability,

From a83eb1ef3b668c66446ee875cd4389f4ef5fd87b Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Tue, 9 Jul 2024 22:28:26 -0700
Subject: [PATCH 05/20] add more file

---
 .../samples/validation_suites/PiML_wrapper.py |  0
 .../samples/validation_suites/custom_tests.py | 56 +++++++++++++++++++
 .../validation_suites/master_test_suites.py   |  7 +++
 3 files changed, 63 insertions(+)
 create mode 100644 24.2/samples/validation_suites/PiML_wrapper.py
 create mode 100644 24.2/samples/validation_suites/custom_tests.py

diff --git a/24.2/samples/validation_suites/PiML_wrapper.py b/24.2/samples/validation_suites/PiML_wrapper.py
new file mode 100644
index 0000000..e69de29
diff --git a/24.2/samples/validation_suites/custom_tests.py b/24.2/samples/validation_suites/custom_tests.py
new file mode 100644
index 0000000..83510ad
--- /dev/null
+++ b/24.2/samples/validation_suites/custom_tests.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+
+if TYPE_CHECKING:
+    from matplotlib.container import BarContainer
+    from numpy import ndarray
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation import TestSuiteReturnType
+
+_logger = logging.getLogger(__name__)
+
+def plot_correlation_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+    subset_columns = internal_parameters.get("subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"])
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    # Select subset of columns
+    training_df = training_df[subset_columns]
+
+    # Calculate the correlation matrix
+    corr_matrix = training_df.corr()
+
+    # Plot the correlation matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
+    plt.title("Correlation Matrix")
+
+    # Save the plot
+    file_path = "Correlation_matrix_plot.png"
+    plt.savefig(file_path)
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
\ No newline at end of file
diff --git a/24.2/samples/validation_suites/master_test_suites.py b/24.2/samples/validation_suites/master_test_suites.py
index b79944b..4a6123d 100644
--- a/24.2/samples/validation_suites/master_test_suites.py
+++ b/24.2/samples/validation_suites/master_test_suites.py
@@ -41,6 +41,10 @@
     pii_check,
 )
 
+from custom_tests import (
+    plot_correlation_matrix
+)
+
 # Map the tests to be used for regression validation
 REGRESSION_FULL_SUITE_MAP_TEST = {
     "roc": plot_residuals,
@@ -115,4 +119,7 @@
         test_dataset_split,
         iqr_and_outliers,
     ],
+    "custom":[
+        plot_correlation_matrix,
+    ]
 }

From 433bf30132fb31b24e1e9b549df724cbed561e11 Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Thu, 11 Jul 2024 10:58:37 -0700
Subject: [PATCH 06/20] change name

---
 ..._tests.py => correlation_matrix_module.py} |   0
 ...ivacy_tests.py => data_privacy_modules.py} |   0
 ...ality_tests.py => data_quality_modules.py} |   0
 .../master_config_test_suites.py              |  64 +++++++++
 .../validation_suites/master_test_suites.py   | 125 ------------------
 5 files changed, 64 insertions(+), 125 deletions(-)
 rename 24.2/samples/validation_suites/{custom_tests.py => correlation_matrix_module.py} (100%)
 rename 24.2/samples/validation_suites/{data_privacy_tests.py => data_privacy_modules.py} (100%)
 rename 24.2/samples/validation_suites/{data_quality_tests.py => data_quality_modules.py} (100%)
 create mode 100644 24.2/samples/validation_suites/master_config_test_suites.py
 delete mode 100644 24.2/samples/validation_suites/master_test_suites.py

diff --git a/24.2/samples/validation_suites/custom_tests.py b/24.2/samples/validation_suites/correlation_matrix_module.py
similarity index 100%
rename from 24.2/samples/validation_suites/custom_tests.py
rename to 24.2/samples/validation_suites/correlation_matrix_module.py
diff --git a/24.2/samples/validation_suites/data_privacy_tests.py b/24.2/samples/validation_suites/data_privacy_modules.py
similarity index 100%
rename from 24.2/samples/validation_suites/data_privacy_tests.py
rename to 24.2/samples/validation_suites/data_privacy_modules.py
diff --git a/24.2/samples/validation_suites/data_quality_tests.py b/24.2/samples/validation_suites/data_quality_modules.py
similarity index 100%
rename from 24.2/samples/validation_suites/data_quality_tests.py
rename to 24.2/samples/validation_suites/data_quality_modules.py
diff --git a/24.2/samples/validation_suites/master_config_test_suites.py b/24.2/samples/validation_suites/master_config_test_suites.py
new file mode 100644
index 0000000..0f13a6a
--- /dev/null
+++ b/24.2/samples/validation_suites/master_config_test_suites.py
@@ -0,0 +1,64 @@
+# import the Vectice provided probability of default validation tests
+from vectice.models.test_library.binary_classification_test import (
+    plot_roc_curve,
+    conf_matrix,
+    explainability,
+    feature_importance,
+    label_drift,
+    prediction_drift,
+)
+
+
+# custom data quality validation tests
+from data_quality_modules import (
+    test_dataset_split,
+    iqr_and_outliers,
+)
+
+# custom data privacy validation tests
+from data_privacy_modules import (
+    sensitive_data_check,
+    sensitive_data_type_check,
+    pii_check,
+)
+
+from correlation_matrix_module import (
+    plot_correlation_matrix
+)
+
+
+# The master test suite file is used to map all suite of test which can be run.
+# The tests can be provided by Vectice or custom functions from your modules.
+# Vectice uses this configuration to simply identify and bundle available tests into suite, when you run
+# your validations in your notebook.
+
+# Accumulation and mapping of all validation tests to be run for the PD model suite
+PD_model_suite= {
+    "binary_suite": [
+        plot_roc_curve,
+        conf_matrix,
+        explainability,
+        feature_importance,
+        label_drift,
+        prediction_drift,
+    ],
+    "data_quality": [
+        test_dataset_split,
+        iqr_and_outliers,
+    ],
+    "custom":[
+        plot_correlation_matrix,
+    ]
+}
+
+# Map the tests to be used for data privacy validation
+Robustness_suite = {
+    "sensitive_data_check": sensitive_data_check,
+    "pii_check": pii_check,
+    "sensitive_data_type_check": sensitive_data_type_check,
+    "data_privacy_full_suite": [
+        sensitive_data_check,
+        pii_check,
+        sensitive_data_type_check,
+    ],
+}
diff --git a/24.2/samples/validation_suites/master_test_suites.py b/24.2/samples/validation_suites/master_test_suites.py
deleted file mode 100644
index 4a6123d..0000000
--- a/24.2/samples/validation_suites/master_test_suites.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# import the Vectice provided probability of default validation tests
-from vectice.models.test_library.binary_classification_test import (
-    plot_roc_curve,
-    conf_matrix,
-    explainability,
-    feature_importance,
-    label_drift,
-    prediction_drift,
-)
-
-# import the Vectice provided regression validation tests
-from vectice.models.test_library.regression_test import (
-    plot_residuals,
-    r2_score,
-    explainability,
-    feature_importance,
-    target_drift,
-    prediction_drift,
-)
-
-# import the Vectice provided time series validation tests
-from vectice.models.test_library.time_series_test import (
-    trend_analysis,
-    seasonality_check,
-    autocorrelation_test,
-    stationarity_test,
-    missing_value_analysis,
-)
-
-
-# custom data quality validation tests
-from data_quality_tests import (
-    test_dataset_split,
-    iqr_and_outliers,
-)
-
-# custom data privacy validation tests
-from data_privacy_tests import (
-    sensitive_data_check,
-    sensitive_data_type_check,
-    pii_check,
-)
-
-from custom_tests import (
-    plot_correlation_matrix
-)
-
-# Map the tests to be used for regression validation
-REGRESSION_FULL_SUITE_MAP_TEST = {
-    "roc": plot_residuals,
-    "cm": r2_score,
-    "explainability": explainability,
-    "feature_importance": feature_importance,
-    "drift": [target_drift, prediction_drift],
-    "binary_full_suite": [
-        plot_residuals,
-        r2_score,
-        explainability,
-        feature_importance,
-        target_drift,
-        prediction_drift,
-    ],
-}
-
-# Map the tests to be used for time series validation
-TIME_SERIES_FULL_SUITE_MAP_TEST = {
-    "trend": trend_analysis,
-    "seasonality": seasonality_check,
-    "autocorrelation": autocorrelation_test,
-    "stationarity": stationarity_test,
-    "missing_value": missing_value_analysis,
-    "time_series_full_suite": [
-        trend_analysis,
-        seasonality_check,
-        autocorrelation_test,
-        stationarity_test,
-        missing_value_analysis,
-    ],
-}
-
-# Map the tests to be used for data quality
-DATA_QUALITY_SUITE_MAP_TEST = {
-    "dataset_split": test_dataset_split,
-    "iqr_and_outliers": iqr_and_outliers,
-    "full_dataset_validation": [
-        test_dataset_split,
-        iqr_and_outliers,
-    ],
-}
-
-# Map the tests to be used for data privacy validation
-DATA_PRIVACY_SUITE_MAP_TEST = {
-    "sensitive_data_check": sensitive_data_check,
-    "pii_check": pii_check,
-    "sensitive_data_type_check": sensitive_data_type_check,
-    "data_privacy_full_suite": [
-        sensitive_data_check,
-        pii_check,
-        sensitive_data_type_check,
-    ],
-}
-
-# The master test suite file is used to map all tests which can be run.
-# The tests can be provided by Vectice or custom functions from your test suite modules.
-# Vectice uses this configuration to simply identify available tests, when you run
-# your validations in your notebook.
-
-# Accumulation and mapping of all validation tests to be run
-CUSTOM_TEST_PD_MODEL = {
-    "binary_suite": [
-        plot_roc_curve,
-        conf_matrix,
-        explainability,
-        feature_importance,
-        label_drift,
-        prediction_drift,
-    ],
-    "data_quality": [
-        test_dataset_split,
-        iqr_and_outliers,
-    ],
-    "custom":[
-        plot_correlation_matrix,
-    ]
-}

From c98e12f3c7061887c6e2f7415a0e438d5fadef25 Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 11:00:06 -0700
Subject: [PATCH 07/20] Create README.md

---
 24.2/samples/validation_suites/README.md | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 24.2/samples/validation_suites/README.md

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
new file mode 100644
index 0000000..90e15ec
--- /dev/null
+++ b/24.2/samples/validation_suites/README.md
@@ -0,0 +1,30 @@
+## All test modules below can be added to your test suite to by run on any models or datasets
+| **Category**                 | **Test Name**                    | **Function**                         |
+|------------------------------|----------------------------------|--------------------------------------|
+| **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |
+|                              | Confusion Matrix                 | `conf_matrix`                        |
+|                              | Explainability                   | `explainability`                     |
+|                              | Feature Importance               | `feature_importance`                 |
+|                              | Label Drift                      | `label_drift`                        |
+|                              | Prediction Drift                 | `prediction_drift`                   |
+|                              | **Full Binary Classification Test** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift` |
+| **Data Privacy Tests**       | Sensitive Data Check             | `sensitive_data_check`               |
+|                              | PII Check                        | `pii_check`                          |
+|                              | Sensitive Data Type Check        | `sensitive_data_type_check`          |
+|                              | **Full Data Privacy Test**       | `sensitive_data_check`, `pii_check`, `sensitive_data_type_check` |
+| **Data Quality Tests**       | Dataset Split Validation         | `test_dataset_split`                 |
+|                              | IQR and Outliers                 | `iqr_and_outliers`                   |
+|                              | **Full Dataset Quality Test**    | `test_dataset_split`, `iqr_and_outliers` |
+| **Regression Tests**         | Residuals Plot                   | `plot_residuals`                     |
+|                              | R² Score                         | `r2_score`                           |
+|                              | Explainability                   | `explainability`                     |
+|                              | Feature Importance               | `feature_importance`                 |
+|                              | Target Drift                     | `target_drift`                       |
+|                              | Prediction Drift                 | `prediction_drift`                   |
+|                              | **Full Regression Test**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
+| **Time Series Tests**        | Trend Analysis                   | `trend_analysis`                     |
+|                              | Seasonality Check                | `seasonality_check`                  |
+|                              | Autocorrelation Test             | `autocorrelation_test`               |
+|                              | Stationarity Test                | `stationarity_test`                  |
+|                              | Missing Value Analysis           | `missing_value_analysis`             |
+|                              | **Full Time Series Test**        | `trend_analysis`, `seasonality_check`, `autocorrelation_test`, `stationarity_test`, `missing_value_analysis` |

From 726ade4a5e071a26ef36ce437ae23dc55a4727cb Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 11:00:54 -0700
Subject: [PATCH 08/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index 90e15ec..ab6fba0 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -14,17 +14,12 @@
 |                              | **Full Data Privacy Test**       | `sensitive_data_check`, `pii_check`, `sensitive_data_type_check` |
 | **Data Quality Tests**       | Dataset Split Validation         | `test_dataset_split`                 |
 |                              | IQR and Outliers                 | `iqr_and_outliers`                   |
-|                              | **Full Dataset Quality Test**    | `test_dataset_split`, `iqr_and_outliers` |
+|                              | **Full Dataset Quality suiteest**    | `test_dataset_split`, `iqr_and_outliers` |
 | **Regression Tests**         | Residuals Plot                   | `plot_residuals`                     |
 |                              | R² Score                         | `r2_score`                           |
 |                              | Explainability                   | `explainability`                     |
 |                              | Feature Importance               | `feature_importance`                 |
 |                              | Target Drift                     | `target_drift`                       |
 |                              | Prediction Drift                 | `prediction_drift`                   |
-|                              | **Full Regression Test**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
-| **Time Series Tests**        | Trend Analysis                   | `trend_analysis`                     |
-|                              | Seasonality Check                | `seasonality_check`                  |
-|                              | Autocorrelation Test             | `autocorrelation_test`               |
-|                              | Stationarity Test                | `stationarity_test`                  |
-|                              | Missing Value Analysis           | `missing_value_analysis`             |
-|                              | **Full Time Series Test**        | `trend_analysis`, `seasonality_check`, `autocorrelation_test`, `stationarity_test`, `missing_value_analysis` |
+|                              | **Full Regression suite**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
+

From e70a6f11e9c73bad43c71074787a0fec8d92f86e Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Thu, 11 Jul 2024 13:03:43 -0700
Subject: [PATCH 09/20] package modules

---
 .../samples/validation_suites/PiML_wrapper.py | 523 ++++++++++++++++++
 .../master_config_test_suites.py              |   6 +-
 .../test_modules/correlation_matrix_module.py |  56 ++
 .../test_modules/data_privacy_modules.py      | 148 +++++
 .../test_modules/data_quality_modules.py      | 116 ++++
 5 files changed, 846 insertions(+), 3 deletions(-)
 create mode 100644 24.2/samples/validation_suites/test_modules/correlation_matrix_module.py
 create mode 100644 24.2/samples/validation_suites/test_modules/data_privacy_modules.py
 create mode 100644 24.2/samples/validation_suites/test_modules/data_quality_modules.py

diff --git a/24.2/samples/validation_suites/PiML_wrapper.py b/24.2/samples/validation_suites/PiML_wrapper.py
index e69de29..5ec37db 100644
--- a/24.2/samples/validation_suites/PiML_wrapper.py
+++ b/24.2/samples/validation_suites/PiML_wrapper.py
@@ -0,0 +1,523 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import shap
+from scipy.stats import chi2_contingency, ks_2samp
+from sklearn.metrics import auc, confusion_matrix, precision_score, recall_score, roc_curve
+
+if TYPE_CHECKING:
+    from matplotlib.container import BarContainer
+    from numpy import ndarray
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation import TestSuiteReturnType
+
+_logger = logging.getLogger(__name__)
+
+
+def plot_roc_curve(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"train_color": "green", "test_color": "blue", "threshold": 0.5},
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation import TestSuiteReturnType
+
+    X_train = training_df.drop(columns=[target_column])
+    X_test = testing_df.drop(columns=[target_column])
+    training_prediction_proba = predictor.predict_proba(X_train)[:, 1]
+    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
+
+    if predict_proba_train is not None:
+        training_prediction_proba = predict_proba_train
+
+    if predict_proba_test is not None:
+        testing_prediction_proba = predict_proba_test
+
+    fpr_train, tpr_train, _ = roc_curve(training_df[target_column], training_prediction_proba)
+    roc_auc_train = auc(fpr_train, tpr_train)
+
+    fpr_test, tpr_test, _ = roc_curve(testing_df[target_column], testing_prediction_proba)
+    roc_auc_test = auc(fpr_test, tpr_test)
+
+    file_path = "ROC_CURVE.png"
+
+    plt.figure(figsize=(8, 6))
+    plt.plot(
+        fpr_train,
+        tpr_train,
+        color=internal_parameters["train_color"],
+        linestyle="--",
+        label=f"Train ROC curve (AUC = {roc_auc_train:.2f})",
+    )
+    plt.plot(
+        fpr_test,
+        tpr_test,
+        color=internal_parameters["test_color"],
+        label=f"Test ROC curve (AUC = {roc_auc_test:.2f})",
+    )
+    plt.plot([0, 1], [0, 1], color="red", linestyle="--")
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    plt.title("Receiver Operating Characteristic (ROC) Curve")
+    plt.legend()
+    plt.grid(True)
+    plt.savefig(file_path)
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={"_ROC_auc_train": roc_auc_train, "_ROC_auc_test": roc_auc_test},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
+
+
+def conf_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"threshold": 0.5, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    threshold = internal_parameters["threshold"]
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    X_test = testing_df.drop(columns=[target_column])
+    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
+
+    if predict_proba_test is not None:
+        testing_prediction_proba = predict_proba_test
+
+    testing_prediction = (testing_prediction_proba >= threshold).astype(int)
+
+    cm = confusion_matrix(testing_df[target_column], testing_prediction)
+    total_samples = np.sum(cm)
+
+    precision = precision_score(testing_df[target_column], testing_prediction)
+    recall = recall_score(testing_df[target_column], testing_prediction)
+
+    # Plot confusion matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(cm, annot=True, cmap=cmap, fmt="d", annot_kws={"fontsize": 12}, cbar=False)
+    for i in range(len(cm)):
+        for j in range(len(cm)):
+            plt.text(
+                j + 0.5,
+                i + 0.75,
+                f"{cm[i][j]/total_samples*100:.2f}%",
+                ha="center",
+                va="center",
+                color="black",
+                fontsize=12,
+            )
+    plt.xlabel("Predicted Label")
+    plt.ylabel("True Label")
+    plt.title(f"Confusion Matrix\nPrecision: {precision:.2f}, Recall: {recall:.2f}")
+
+    # Save the plot
+    file_path = "Confusion_matrix_plot.png"
+    plt.savefig(file_path)
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={"_precision_test": precision, "_recall_test": recall},
+        properties={"Threshold": threshold},
+        tables=[],
+        attachments=[file_path],
+    )
+
+
+def explainability(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    explainer = shap.Explainer(predictor, training_df.drop(columns=[target_column]))
+    shap_values = explainer(training_df.drop(columns=[target_column]).head(1000))
+    shap.summary_plot(
+        shap_values[:, :, 0], training_df.drop(columns=[target_column]).head(1000), max_display=10, show=False
+    )
+    summary_plot_path = "SHAP_summary_plot.png"
+    plt.savefig(summary_plot_path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(metrics={}, properties={}, tables=[], attachments=[summary_plot_path])
+
+
+def feature_importance(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    explainer = shap.Explainer(predictor, training_df.drop(columns=[target_column]))
+    shap_values = explainer(training_df.drop(columns=[target_column]).head(1000))
+    clustering = shap.utils.hclust(
+        training_df.drop(columns=[target_column]).head(1000), training_df[target_column].head(1000)
+    )
+    shap.plots.bar(shap_values[:, :, 0], clustering=clustering, max_display=10, show=False)
+
+    feature_importance_path = "feature_importance.png"
+    plt.savefig(feature_importance_path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(metrics={}, properties={}, tables=[], attachments=[feature_importance_path])
+
+
+def cramers_v_score(x: ndarray[Any, Any], y: ndarray[Any, Any]) -> float:
+
+    min_length = min(len(x), len(y), 4000)
+    x = x[:min_length]
+    y = y[:min_length]
+    confusion_matrix = pd.crosstab(x, y)
+    chi2 = chi2_contingency(confusion_matrix)[0]
+    n = confusion_matrix.sum().sum()
+    phi2 = chi2 / n
+    r, k = confusion_matrix.shape
+    phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
+    rcorr = r - ((r - 1) ** 2) / (n - 1)
+    kcorr = k - ((k - 1) ** 2) / (n - 1)
+    return np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))
+
+
+def ks_score(x: ndarray[Any, Any], y: ndarray[Any, Any]) -> float:
+    min_length = min(len(x), len(y), 4000)
+    x = x[:min_length]
+    y = y[:min_length]
+    ks_statistic, _ = ks_2samp(x, y)
+
+    return ks_statistic
+
+
+def prediction_drift(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    threshold: float,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    X_train = training_df.drop(columns=[target_column])
+    X_test = testing_df.drop(columns=[target_column])
+    training_prediction_proba = predictor.predict_proba(X_train)[:, 1]
+    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
+
+    if predict_proba_train is not None:
+        training_prediction_proba = predict_proba_train
+
+    if predict_proba_test is not None:
+        testing_prediction_proba = predict_proba_test
+
+    train_predictions = np.array(training_prediction_proba)
+    test_predictions = np.array(testing_prediction_proba)
+
+    light_red = "#FF8A80"  # Light Red
+    darker_blue = "#1565C0"  # Darker Blue
+    sns.set_palette([darker_blue, light_red])
+
+    _, ax = plt.subplots(figsize=(8, 6))
+
+    sns.kdeplot(train_predictions, color=light_red, label="Train Predictions", fill=True)
+    sns.kdeplot(test_predictions, color=darker_blue, label="Test Predictions", fill=True)
+
+    # Plot vertical lines for means using the specified colors
+    ax.axvline(  # pyright: ignore[reportAttributeAccessIssue]
+        np.mean(train_predictions),  # pyright: ignore[reportArgumentType]
+        color=light_red,
+        linestyle="--",
+        label="Train Mean",
+    )
+    ax.axvline(  # pyright: ignore[reportAttributeAccessIssue]
+        np.mean(test_predictions),  # pyright: ignore[reportArgumentType]
+        color=darker_blue,
+        linestyle="--",
+        label="Test Mean",
+    )
+
+    plt.xlabel("Predictions")
+    plt.ylabel("Density")
+    plt.title("Prediction Drift Plot (Kolmogorov-Smirnov drift score)")
+    plt.legend()
+    plt.grid(True)
+    path = "Prediction_drift.png"
+
+    # Calculate and print drift score
+    drift_score = ks_score(train_predictions, test_predictions)
+
+    # Set text position at the top
+    text_x = 0.5
+    text_y = 0.95
+    if drift_score < 0.1:
+        score_color = "green"
+    elif 0.1 <= drift_score <= 0.2:
+        score_color = "orange"
+    else:
+        score_color = "red"
+
+    plt.text(
+        text_x,
+        text_y,
+        f"Drift score = {drift_score:.2f}",
+        ha="center",
+        va="top",
+        color=score_color,
+        transform=ax.transAxes,  # pyright: ignore[reportAttributeAccessIssue]
+    )
+
+    plt.savefig(path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={}, properties={"_prediction_drift_score": drift_score}, tables=[], attachments=[path]
+    )
+
+
+def label_drift(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    train_labels = np.array(training_df[target_column])
+    test_labels = np.array(testing_df[target_column])
+
+    light_red = "#FF8A80"  # Light Red
+    darker_blue = "#1565C0"  # Darker Blue
+    sns.set_palette([darker_blue, light_red])
+
+    _, ax = plt.subplots(figsize=(8, 6))
+
+    bar_width = 0.35
+    index = np.arange(2)
+
+    train_counts = [np.sum(train_labels == 0) / len(train_labels), np.sum(train_labels == 1) / len(train_labels)]
+    test_counts = [np.sum(test_labels == 0) / len(test_labels), np.sum(test_labels == 1) / len(test_labels)]
+
+    train_bar = ax.bar(  # pyright: ignore[reportAttributeAccessIssue]
+        index, train_counts, bar_width, label="Train Labels"
+    )
+    test_bar = ax.bar(  # pyright: ignore[reportAttributeAccessIssue]
+        index + bar_width, test_counts, bar_width, label="Test Labels"
+    )
+
+    ax.set_xlabel("Labels")  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_ylabel("Frequency")  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_title("Label Drift Plot (Cramer's V drift score)")  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_xticks(index + bar_width / 2)  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_xticklabels(["0", "1"])  # pyright: ignore[reportAttributeAccessIssue]
+    ax.legend()  # pyright: ignore[reportAttributeAccessIssue]
+
+    def autolabel(bars: BarContainer):
+        """Attach a text label above each bar in *bars*, displaying its height."""
+        for bar in bars:
+            height = bar.get_height()
+            ax.annotate(  # pyright: ignore[reportAttributeAccessIssue]
+                f"{height:.2f}",
+                xy=(bar.get_x() + bar.get_width() / 2, height),
+                xytext=(0, 3),
+                textcoords="offset points",
+                ha="center",
+                va="bottom",
+            )
+
+    autolabel(train_bar)
+    autolabel(test_bar)
+
+    drift_score = cramers_v_score(train_labels, test_labels)
+    if drift_score < 0.1:
+        score_color = "green"
+    elif 0.1 <= drift_score <= 0.2:
+        score_color = "orange"
+    else:
+        score_color = "red"
+
+    ax.text(  # pyright: ignore[reportAttributeAccessIssue]
+        0.5,
+        0.95,
+        f"Drift score = {drift_score:.2f}",
+        ha="center",
+        va="top",
+        color=score_color,
+        transform=ax.transAxes,  # pyright: ignore[reportAttributeAccessIssue]
+    )
+
+    plt.tight_layout()
+    path = "Label_drift.png"
+    plt.savefig(path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={}, properties={"_label_drift_score": drift_score}, tables=[], attachments=[path]
+    )
+
+
+def plot_correlation_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    subset_columns = internal_parameters.get(
+        "subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"]
+    )
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    # Select subset of columns
+    training_df = training_df[subset_columns]
+
+    # Calculate the correlation matrix
+    corr_matrix = training_df.corr()
+
+    # Plot the correlation matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
+    plt.title("Correlation Matrix")
+
+    # Save the plot
+    file_path = "Correlation_matrix_plot.png"
+    plt.savefig(file_path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
+
+
+# custom test which can be used for dataset validation
+def test_dataset_split(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice import Table
+    from vectice.models.validation import TestSuiteReturnType
+
+    total_df = len(training_df) + len(testing_df)
+
+    # Create a DataFrame with the results
+    datasplit_df = pd.DataFrame(
+        {
+            "Dataset": ["Train", "Test", "Total"],
+            "Size": [len(training_df), len(testing_df), total_df],
+            "Percentage": [
+                (len(training_df) / total_df * 100),
+                (len(testing_df) / total_df * 100),
+                100,
+            ],
+        }
+    )
+
+    table = Table(datasplit_df)
+
+    return TestSuiteReturnType(metrics={}, properties={}, tables=[table], attachments=[])
+
+
+# custom test which can be used for dataset validation
+def iqr_and_outliers(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation import TestSuiteReturnType
+
+    dataset = training_df
+
+    files = []
+    # disable plots showing
+    if internal_parameters.get("subset_columns") is not None:
+        columns = internal_parameters.get("subset_columns")
+    else:
+        columns = dataset.select_dtypes(include=[np.number]).columns[:10]
+    plt.ioff()
+    for column in columns:  # type: ignore
+        file_name = f"iqr_and_outliers_{column}.png"
+
+        temp_file_path = file_name
+
+        Q1 = dataset[column].quantile(0.25)
+        Q3 = dataset[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+
+        plt.figure(figsize=(10, 6))
+        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
+        plt.axvline(Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}")
+        plt.axvline(Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}")
+        plt.axvline(
+            dataset[column].median(),
+            color="g",
+            linestyle="-",
+            label=f"Median: {dataset[column].median():.2f}",
+        )
+        plt.fill_betweenx([0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}")
+
+        # Highlight outliers
+        outliers = dataset[(dataset[column] < lower_bound) | (dataset[column] > upper_bound)][column]
+        plt.scatter(outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5)
+
+        plt.title(f"Histogram with IQR and Outliers for {column}")
+        plt.xlabel(column)
+        plt.ylabel("Frequency")
+        plt.legend()
+        plt.savefig(temp_file_path, bbox_inches="tight")
+        files.append(temp_file_path)
+
+    plt.ion()
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=files,
+    )
\ No newline at end of file
diff --git a/24.2/samples/validation_suites/master_config_test_suites.py b/24.2/samples/validation_suites/master_config_test_suites.py
index 0f13a6a..8211612 100644
--- a/24.2/samples/validation_suites/master_config_test_suites.py
+++ b/24.2/samples/validation_suites/master_config_test_suites.py
@@ -10,19 +10,19 @@
 
 
 # custom data quality validation tests
-from data_quality_modules import (
+from test_modules.data_quality_modules import (
     test_dataset_split,
     iqr_and_outliers,
 )
 
 # custom data privacy validation tests
-from data_privacy_modules import (
+from test_modules.data_privacy_modules import (
     sensitive_data_check,
     sensitive_data_type_check,
     pii_check,
 )
 
-from correlation_matrix_module import (
+from test_modules.correlation_matrix_module import (
     plot_correlation_matrix
 )
 
diff --git a/24.2/samples/validation_suites/test_modules/correlation_matrix_module.py b/24.2/samples/validation_suites/test_modules/correlation_matrix_module.py
new file mode 100644
index 0000000..83510ad
--- /dev/null
+++ b/24.2/samples/validation_suites/test_modules/correlation_matrix_module.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+
+if TYPE_CHECKING:
+    from matplotlib.container import BarContainer
+    from numpy import ndarray
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation import TestSuiteReturnType
+
+_logger = logging.getLogger(__name__)
+
+def plot_correlation_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+    subset_columns = internal_parameters.get("subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"])
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    # Select subset of columns
+    training_df = training_df[subset_columns]
+
+    # Calculate the correlation matrix
+    corr_matrix = training_df.corr()
+
+    # Plot the correlation matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
+    plt.title("Correlation Matrix")
+
+    # Save the plot
+    file_path = "Correlation_matrix_plot.png"
+    plt.savefig(file_path)
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
\ No newline at end of file
diff --git a/24.2/samples/validation_suites/test_modules/data_privacy_modules.py b/24.2/samples/validation_suites/test_modules/data_privacy_modules.py
new file mode 100644
index 0000000..90d851f
--- /dev/null
+++ b/24.2/samples/validation_suites/test_modules/data_privacy_modules.py
@@ -0,0 +1,148 @@
+# Write custom tests which can be used to validate your datasets security
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pandas as pd
+
+if TYPE_CHECKING:
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+
+def sensitive_data_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+    sensitive_keywords: list | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None or sensitive_keywords is None:
+        return None
+
+    # Initialize a dictionary to hold counts of sensitive data
+    sensitive_counts = {keyword: 0 for keyword in sensitive_keywords}
+
+    # Check each cell in the DataFrame for sensitive keywords
+    for keyword in sensitive_keywords:
+        sensitive_counts[keyword] = dataset.apply(
+            lambda x: x.astype(str).str.contains(keyword, case=False).sum()
+        ).sum()
+
+    # Create a DataFrame with the results
+    sensitive_counts_df = pd.DataFrame(
+        {
+            "Sensitive Keyword": list(sensitive_counts.keys()),
+            "Count": list(sensitive_counts.values()),
+        }
+    )
+
+    table = Table(sensitive_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+def pii_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    # Define common PII patterns
+    pii_patterns = {
+        "name": r"\b[A-Z][a-z]*\b",
+        "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b",
+        "phone": r"\b(\+?[\d]{1,3}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,9})\b",
+    }
+
+    # Initialize a dictionary to hold counts of PII matches
+    pii_counts = {key: 0 for key in pii_patterns.keys()}
+
+    # Check each column in the DataFrame for PII patterns
+    for column in dataset.columns:
+        for key, pattern in pii_patterns.items():
+            pii_counts[key] += (
+                dataset[column]
+                .astype(str)
+                .str.contains(pattern, case=False, regex=True)
+                .sum()
+            )
+
+    # Create a DataFrame with the results
+    pii_counts_df = pd.DataFrame(
+        {"PII Type": list(pii_counts.keys()), "Count": list(pii_counts.values())}
+    )
+
+    table = Table(pii_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+def sensitive_data_type_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    # Define patterns for sensitive data types
+    sensitive_data_patterns = {
+        "credit_card": r"\b(?:\d[ -]*?){13,16}\b",
+        "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
+    }
+
+    # Initialize a dictionary to hold counts of sensitive data type matches
+    sensitive_data_counts = {key: 0 for key in sensitive_data_patterns.keys()}
+
+    # Check each column in the DataFrame for sensitive data type patterns
+    for column in dataset.columns:
+        for key, pattern in sensitive_data_patterns.items():
+            sensitive_data_counts[key] += (
+                dataset[column]
+                .astype(str)
+                .str.contains(pattern, case=False, regex=True)
+                .sum()
+            )
+
+    # Create a DataFrame with the results
+    sensitive_data_counts_df = pd.DataFrame(
+        {
+            "Sensitive Data Type": list(sensitive_data_counts.keys()),
+            "Count": list(sensitive_data_counts.values()),
+        }
+    )
+
+    table = Table(sensitive_data_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
diff --git a/24.2/samples/validation_suites/test_modules/data_quality_modules.py b/24.2/samples/validation_suites/test_modules/data_quality_modules.py
new file mode 100644
index 0000000..05b3ae5
--- /dev/null
+++ b/24.2/samples/validation_suites/test_modules/data_quality_modules.py
@@ -0,0 +1,116 @@
+# Write custom tests which can be used to validate your datasets quality
+from __future__ import annotations
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from vectice.models.validation_dataset import TestSuiteReturnType
+
+
+# custom test which can be used for dataset validation
+def test_dataset_split(
+    dataset: DataFrame | None,
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    feature_columns: list | None = None,
+    threshold: float | None = None,
+) -> TestSuiteReturnType:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    total_df = len(training_df) + len(testing_df)
+
+    # Create a DataFrame with the results
+    datasplit_df = pd.DataFrame(
+        {
+            "Dataset": ["Train", "Test", "Total"],
+            "Size": [len(training_df), len(testing_df), total_df],
+            "Percentage": [
+                (len(training_df) / total_df * 100),
+                (len(testing_df) / total_df * 100),
+                100,
+            ],
+        }
+    )
+
+    table = Table(datasplit_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+# custom test which can be used for dataset validation
+def iqr_and_outliers(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: list | None = None,
+    target_column: str | None = None,
+    threshold: float | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    files = []
+    # disable plots showing
+    plt.ioff()
+    for column in dataset.select_dtypes(include=[np.number]).columns:
+        file_name = f"iqr_and_outliers_{column}.png"
+
+        temp_file_path = file_name
+
+        Q1 = dataset[column].quantile(0.25)
+        Q3 = dataset[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+
+        plt.figure(figsize=(10, 6))
+        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
+        plt.axvline(
+            Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}"
+        )
+        plt.axvline(
+            Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}"
+        )
+        plt.axvline(
+            dataset[column].median(),
+            color="g",
+            linestyle="-",
+            label=f"Median: {dataset[column].median():.2f}",
+        )
+        plt.fill_betweenx(
+            [0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}"
+        )
+
+        # Highlight outliers
+        outliers = dataset[
+            (dataset[column] < lower_bound) | (dataset[column] > upper_bound)
+        ][column]
+        plt.scatter(
+            outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5
+        )
+
+        plt.title(f"Histogram with IQR and Outliers for {column}")
+        plt.xlabel(column)
+        plt.ylabel("Frequency")
+        plt.legend()
+        plt.savefig(temp_file_path, bbox_inches="tight")
+        files.append(temp_file_path)
+
+    plt.ion()
+    return TestSuiteReturnType(
+        properties={},
+        tables=[],
+        attachments=files,
+    )

From e7b4b46973a5dfcd73fe8e481d09dab4a53df222 Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 13:05:48 -0700
Subject: [PATCH 10/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index ab6fba0..afec820 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -7,19 +7,19 @@
 |                              | Feature Importance               | `feature_importance`                 |
 |                              | Label Drift                      | `label_drift`                        |
 |                              | Prediction Drift                 | `prediction_drift`                   |
-|                              | **Full Binary Classification Test** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift` |
+|                              | **Binary Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift` |
 | **Data Privacy Tests**       | Sensitive Data Check             | `sensitive_data_check`               |
 |                              | PII Check                        | `pii_check`                          |
 |                              | Sensitive Data Type Check        | `sensitive_data_type_check`          |
-|                              | **Full Data Privacy Test**       | `sensitive_data_check`, `pii_check`, `sensitive_data_type_check` |
+|                              | **Data Privacy suite**       | `sensitive_data_check`, `pii_check`, `sensitive_data_type_check` |
 | **Data Quality Tests**       | Dataset Split Validation         | `test_dataset_split`                 |
 |                              | IQR and Outliers                 | `iqr_and_outliers`                   |
-|                              | **Full Dataset Quality suiteest**    | `test_dataset_split`, `iqr_and_outliers` |
+|                              | **Dataset Quality suite**    | `test_dataset_split`, `iqr_and_outliers` |
 | **Regression Tests**         | Residuals Plot                   | `plot_residuals`                     |
 |                              | R² Score                         | `r2_score`                           |
 |                              | Explainability                   | `explainability`                     |
 |                              | Feature Importance               | `feature_importance`                 |
 |                              | Target Drift                     | `target_drift`                       |
 |                              | Prediction Drift                 | `prediction_drift`                   |
-|                              | **Full Regression suite**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
+|                              | **Regression suite**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
 

From b6b672cfba62fe6387126e76aedaf27bdd745aab Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:12:43 -0700
Subject: [PATCH 11/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index afec820..20584b1 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -7,11 +7,13 @@
 |                              | Feature Importance               | `feature_importance`                 |
 |                              | Label Drift                      | `label_drift`                        |
 |                              | Prediction Drift                 | `prediction_drift`                   |
+|                              | Recall by class                  | `recall_by_class `                   |
+|                              | Precision by class               | `precision_by_class `                   |
 |                              | **Binary Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift` |
+|                              | **Multiclass Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift`, `recall_by_class `, `precision_by_class ` |
 | **Data Privacy Tests**       | Sensitive Data Check             | `sensitive_data_check`               |
 |                              | PII Check                        | `pii_check`                          |
 |                              | Sensitive Data Type Check        | `sensitive_data_type_check`          |
-|                              | **Data Privacy suite**       | `sensitive_data_check`, `pii_check`, `sensitive_data_type_check` |
 | **Data Quality Tests**       | Dataset Split Validation         | `test_dataset_split`                 |
 |                              | IQR and Outliers                 | `iqr_and_outliers`                   |
 |                              | **Dataset Quality suite**    | `test_dataset_split`, `iqr_and_outliers` |
@@ -23,3 +25,4 @@
 |                              | Prediction Drift                 | `prediction_drift`                   |
 |                              | **Regression suite**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
 
+

From 91072e56c18314bad08b26c49711709943093ba7 Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:14:13 -0700
Subject: [PATCH 12/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index 20584b1..34f5df3 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -1,4 +1,4 @@
-## All test modules below can be added to your test suite to by run on any models or datasets
+## All tests and suites below can be added to your test suite to by run on any models or datasets and available inside Vectice default validation Library
 | **Category**                 | **Test Name**                    | **Function**                         |
 |------------------------------|----------------------------------|--------------------------------------|
 | **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |

From 5b6027e08be62a9fe399fbdc7a1536c0e4a284b2 Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:41:42 -0700
Subject: [PATCH 13/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index 34f5df3..1dd831e 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -1,4 +1,4 @@
-## All tests and suites below can be added to your test suite to by run on any models or datasets and available inside Vectice default validation Library
+## List of default validation tests provided by Vectice with source code from PiML
 | **Category**                 | **Test Name**                    | **Function**                         |
 |------------------------------|----------------------------------|--------------------------------------|
 | **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |

From 8c72c49fba7445aaf80b46de3c4f6cd7b0cae04e Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:42:13 -0700
Subject: [PATCH 14/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index 1dd831e..6822cdc 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -1,4 +1,4 @@
-## List of default validation tests provided by Vectice with source code from PiML
+## List of validation tests provided by Vectice with source code from PiML
 | **Category**                 | **Test Name**                    | **Function**                         |
 |------------------------------|----------------------------------|--------------------------------------|
 | **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |

From 0702506d3d472dafb1feac2545faed12ad383ebf Mon Sep 17 00:00:00 2001
From: BDaversa <130710586+BDaversa@users.noreply.github.com>
Date: Thu, 11 Jul 2024 14:42:33 -0700
Subject: [PATCH 15/20] Update README.md

---
 24.2/samples/validation_suites/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/validation_suites/README.md
index 6822cdc..349c78f 100644
--- a/24.2/samples/validation_suites/README.md
+++ b/24.2/samples/validation_suites/README.md
@@ -1,4 +1,4 @@
-## List of validation tests provided by Vectice with source code from PiML
+## List of validation tests provided by Vectice (source code from PiML)
 | **Category**                 | **Test Name**                    | **Function**                         |
 |------------------------------|----------------------------------|--------------------------------------|
 | **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |

From de22e95b4e96226874c684c25aef6d4687ae4e5d Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Thu, 11 Jul 2024 14:48:47 -0700
Subject: [PATCH 16/20] reshape

---
 .../README.md                                 |   0
 .../master_config_test_suites.py              |   0
 .../correlation_matrix_module.py              |   0
 .../test_modules}/data_privacy_modules.py     |   0
 .../test_modules}/data_quality_modules.py     |   0
 .../test_modules/default_tests_vectice.py}    |   0
 .../test_modules/correlation_matrix_module.py |  56 -------
 .../test_modules/data_privacy_modules.py      | 148 ------------------
 .../test_modules/data_quality_modules.py      | 116 --------------
 9 files changed, 320 deletions(-)
 rename 24.2/samples/{validation_suites => test_suites_config}/README.md (100%)
 rename 24.2/samples/{validation_suites => test_suites_config}/master_config_test_suites.py (100%)
 rename 24.2/samples/{validation_suites => test_suites_config/test_modules}/correlation_matrix_module.py (100%)
 rename 24.2/samples/{validation_suites => test_suites_config/test_modules}/data_privacy_modules.py (100%)
 rename 24.2/samples/{validation_suites => test_suites_config/test_modules}/data_quality_modules.py (100%)
 rename 24.2/samples/{validation_suites/PiML_wrapper.py => test_suites_config/test_modules/default_tests_vectice.py} (100%)
 delete mode 100644 24.2/samples/validation_suites/test_modules/correlation_matrix_module.py
 delete mode 100644 24.2/samples/validation_suites/test_modules/data_privacy_modules.py
 delete mode 100644 24.2/samples/validation_suites/test_modules/data_quality_modules.py

diff --git a/24.2/samples/validation_suites/README.md b/24.2/samples/test_suites_config/README.md
similarity index 100%
rename from 24.2/samples/validation_suites/README.md
rename to 24.2/samples/test_suites_config/README.md
diff --git a/24.2/samples/validation_suites/master_config_test_suites.py b/24.2/samples/test_suites_config/master_config_test_suites.py
similarity index 100%
rename from 24.2/samples/validation_suites/master_config_test_suites.py
rename to 24.2/samples/test_suites_config/master_config_test_suites.py
diff --git a/24.2/samples/validation_suites/correlation_matrix_module.py b/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
similarity index 100%
rename from 24.2/samples/validation_suites/correlation_matrix_module.py
rename to 24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
diff --git a/24.2/samples/validation_suites/data_privacy_modules.py b/24.2/samples/test_suites_config/test_modules/data_privacy_modules.py
similarity index 100%
rename from 24.2/samples/validation_suites/data_privacy_modules.py
rename to 24.2/samples/test_suites_config/test_modules/data_privacy_modules.py
diff --git a/24.2/samples/validation_suites/data_quality_modules.py b/24.2/samples/test_suites_config/test_modules/data_quality_modules.py
similarity index 100%
rename from 24.2/samples/validation_suites/data_quality_modules.py
rename to 24.2/samples/test_suites_config/test_modules/data_quality_modules.py
diff --git a/24.2/samples/validation_suites/PiML_wrapper.py b/24.2/samples/test_suites_config/test_modules/default_tests_vectice.py
similarity index 100%
rename from 24.2/samples/validation_suites/PiML_wrapper.py
rename to 24.2/samples/test_suites_config/test_modules/default_tests_vectice.py
diff --git a/24.2/samples/validation_suites/test_modules/correlation_matrix_module.py b/24.2/samples/validation_suites/test_modules/correlation_matrix_module.py
deleted file mode 100644
index 83510ad..0000000
--- a/24.2/samples/validation_suites/test_modules/correlation_matrix_module.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any, Dict
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-
-
-if TYPE_CHECKING:
-    from matplotlib.container import BarContainer
-    from numpy import ndarray
-    from numpy.typing import ArrayLike
-    from pandas import DataFrame
-
-    from vectice.models.validation import TestSuiteReturnType
-
-_logger = logging.getLogger(__name__)
-
-def plot_correlation_matrix(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-    subset_columns = internal_parameters.get("subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"])
-    cmap = internal_parameters.get("cmap", "Blues")
-
-    # Select subset of columns
-    training_df = training_df[subset_columns]
-
-    # Calculate the correlation matrix
-    corr_matrix = training_df.corr()
-
-    # Plot the correlation matrix
-    plt.figure(figsize=(10, 8))
-    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
-    plt.title("Correlation Matrix")
-
-    # Save the plot
-    file_path = "Correlation_matrix_plot.png"
-    plt.savefig(file_path)
-    plt.close()
-
-    return TestSuiteReturnType(
-        metrics={},
-        properties={},
-        tables=[],
-        attachments=[file_path],
-    )
\ No newline at end of file
diff --git a/24.2/samples/validation_suites/test_modules/data_privacy_modules.py b/24.2/samples/validation_suites/test_modules/data_privacy_modules.py
deleted file mode 100644
index 90d851f..0000000
--- a/24.2/samples/validation_suites/test_modules/data_privacy_modules.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Write custom tests which can be used to validate your datasets security
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import pandas as pd
-
-if TYPE_CHECKING:
-    from numpy.typing import ArrayLike
-    from pandas import DataFrame
-
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-
-def sensitive_data_check(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: ArrayLike | list | None = None,
-    target_column: ArrayLike | str | None = None,
-    sensitive_keywords: list | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None or sensitive_keywords is None:
-        return None
-
-    # Initialize a dictionary to hold counts of sensitive data
-    sensitive_counts = {keyword: 0 for keyword in sensitive_keywords}
-
-    # Check each cell in the DataFrame for sensitive keywords
-    for keyword in sensitive_keywords:
-        sensitive_counts[keyword] = dataset.apply(
-            lambda x: x.astype(str).str.contains(keyword, case=False).sum()
-        ).sum()
-
-    # Create a DataFrame with the results
-    sensitive_counts_df = pd.DataFrame(
-        {
-            "Sensitive Keyword": list(sensitive_counts.keys()),
-            "Count": list(sensitive_counts.values()),
-        }
-    )
-
-    table = Table(sensitive_counts_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
-
-
-def pii_check(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: ArrayLike | list | None = None,
-    target_column: ArrayLike | str | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    # Define common PII patterns
-    pii_patterns = {
-        "name": r"\b[A-Z][a-z]*\b",
-        "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b",
-        "phone": r"\b(\+?[\d]{1,3}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,9})\b",
-    }
-
-    # Initialize a dictionary to hold counts of PII matches
-    pii_counts = {key: 0 for key in pii_patterns.keys()}
-
-    # Check each column in the DataFrame for PII patterns
-    for column in dataset.columns:
-        for key, pattern in pii_patterns.items():
-            pii_counts[key] += (
-                dataset[column]
-                .astype(str)
-                .str.contains(pattern, case=False, regex=True)
-                .sum()
-            )
-
-    # Create a DataFrame with the results
-    pii_counts_df = pd.DataFrame(
-        {"PII Type": list(pii_counts.keys()), "Count": list(pii_counts.values())}
-    )
-
-    table = Table(pii_counts_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
-
-
-def sensitive_data_type_check(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: ArrayLike | list | None = None,
-    target_column: ArrayLike | str | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    # Define patterns for sensitive data types
-    sensitive_data_patterns = {
-        "credit_card": r"\b(?:\d[ -]*?){13,16}\b",
-        "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
-    }
-
-    # Initialize a dictionary to hold counts of sensitive data type matches
-    sensitive_data_counts = {key: 0 for key in sensitive_data_patterns.keys()}
-
-    # Check each column in the DataFrame for sensitive data type patterns
-    for column in dataset.columns:
-        for key, pattern in sensitive_data_patterns.items():
-            sensitive_data_counts[key] += (
-                dataset[column]
-                .astype(str)
-                .str.contains(pattern, case=False, regex=True)
-                .sum()
-            )
-
-    # Create a DataFrame with the results
-    sensitive_data_counts_df = pd.DataFrame(
-        {
-            "Sensitive Data Type": list(sensitive_data_counts.keys()),
-            "Count": list(sensitive_data_counts.values()),
-        }
-    )
-
-    table = Table(sensitive_data_counts_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
diff --git a/24.2/samples/validation_suites/test_modules/data_quality_modules.py b/24.2/samples/validation_suites/test_modules/data_quality_modules.py
deleted file mode 100644
index 05b3ae5..0000000
--- a/24.2/samples/validation_suites/test_modules/data_quality_modules.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Write custom tests which can be used to validate your datasets quality
-from __future__ import annotations
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from pandas import DataFrame
-from vectice.models.validation_dataset import TestSuiteReturnType
-
-
-# custom test which can be used for dataset validation
-def test_dataset_split(
-    dataset: DataFrame | None,
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    feature_columns: list | None = None,
-    threshold: float | None = None,
-) -> TestSuiteReturnType:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    total_df = len(training_df) + len(testing_df)
-
-    # Create a DataFrame with the results
-    datasplit_df = pd.DataFrame(
-        {
-            "Dataset": ["Train", "Test", "Total"],
-            "Size": [len(training_df), len(testing_df), total_df],
-            "Percentage": [
-                (len(training_df) / total_df * 100),
-                (len(testing_df) / total_df * 100),
-                100,
-            ],
-        }
-    )
-
-    table = Table(datasplit_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
-
-
-# custom test which can be used for dataset validation
-def iqr_and_outliers(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: list | None = None,
-    target_column: str | None = None,
-    threshold: float | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    files = []
-    # disable plots showing
-    plt.ioff()
-    for column in dataset.select_dtypes(include=[np.number]).columns:
-        file_name = f"iqr_and_outliers_{column}.png"
-
-        temp_file_path = file_name
-
-        Q1 = dataset[column].quantile(0.25)
-        Q3 = dataset[column].quantile(0.75)
-        IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-
-        plt.figure(figsize=(10, 6))
-        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
-        plt.axvline(
-            Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}"
-        )
-        plt.axvline(
-            Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}"
-        )
-        plt.axvline(
-            dataset[column].median(),
-            color="g",
-            linestyle="-",
-            label=f"Median: {dataset[column].median():.2f}",
-        )
-        plt.fill_betweenx(
-            [0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}"
-        )
-
-        # Highlight outliers
-        outliers = dataset[
-            (dataset[column] < lower_bound) | (dataset[column] > upper_bound)
-        ][column]
-        plt.scatter(
-            outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5
-        )
-
-        plt.title(f"Histogram with IQR and Outliers for {column}")
-        plt.xlabel(column)
-        plt.ylabel("Frequency")
-        plt.legend()
-        plt.savefig(temp_file_path, bbox_inches="tight")
-        files.append(temp_file_path)
-
-    plt.ion()
-    return TestSuiteReturnType(
-        properties={},
-        tables=[],
-        attachments=files,
-    )

From d5c7c8ea3d71590951320c29247e5cf9fe48c40d Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Thu, 11 Jul 2024 15:53:12 -0700
Subject: [PATCH 17/20] rephrase suite

---
 .../test_suites_config/master_config_test_suites.py      | 9 +++++----
 .../test_modules/correlation_matrix_module.py            | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/24.2/samples/test_suites_config/master_config_test_suites.py b/24.2/samples/test_suites_config/master_config_test_suites.py
index 8211612..24a5005 100644
--- a/24.2/samples/test_suites_config/master_config_test_suites.py
+++ b/24.2/samples/test_suites_config/master_config_test_suites.py
@@ -27,7 +27,7 @@
 )
 
 
-# The master test suite file is used to map all suite of test which can be run.
+# The master test suite file is used to map all ADDITIONAL suite of test which can be run.
 # The tests can be provided by Vectice or custom functions from your modules.
 # Vectice uses this configuration to simply identify and bundle available tests into suite, when you run
 # your validations in your notebook.
@@ -42,13 +42,14 @@
         label_drift,
         prediction_drift,
     ],
-    "data_quality": [
+    "data_quality_ext": [
         test_dataset_split,
         iqr_and_outliers,
+        plot_correlation_matrix,
     ],
-    "custom":[
+    "corr_matrix_ext": [
         plot_correlation_matrix,
-    ]
+    ],
 }
 
 # Map the tests to be used for data privacy validation
diff --git a/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py b/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
index 83510ad..3935f9a 100644
--- a/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
+++ b/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
@@ -29,6 +29,7 @@ def plot_correlation_matrix(
     internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
 ) -> TestSuiteReturnType:
     from vectice.models.validation import TestSuiteReturnType
+    
     subset_columns = internal_parameters.get("subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"])
     cmap = internal_parameters.get("cmap", "Blues")
 
@@ -48,6 +49,7 @@ def plot_correlation_matrix(
     plt.savefig(file_path)
     plt.close()
 
+    # RETURN IN THE VECTICE EXPECTED FORMART
     return TestSuiteReturnType(
         metrics={},
         properties={},

From 8b500550bbec3c9a6134eeea6cff49f8134bfce2 Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Thu, 11 Jul 2024 17:36:22 -0700
Subject: [PATCH 18/20] reorganize folder

---
 Validation test/test_suites_config/README.md  |  28 +
 .../master_config_test_suites.py              |  65 +++
 .../test_modules/correlation_matrix_module.py |  58 ++
 .../test_modules/data_privacy_modules.py      | 148 +++++
 .../test_modules/data_quality_modules.py      | 116 ++++
 .../test_modules/default_tests_vectice.py     | 523 ++++++++++++++++++
 6 files changed, 938 insertions(+)
 create mode 100644 Validation test/test_suites_config/README.md
 create mode 100644 Validation test/test_suites_config/master_config_test_suites.py
 create mode 100644 Validation test/test_suites_config/test_modules/correlation_matrix_module.py
 create mode 100644 Validation test/test_suites_config/test_modules/data_privacy_modules.py
 create mode 100644 Validation test/test_suites_config/test_modules/data_quality_modules.py
 create mode 100644 Validation test/test_suites_config/test_modules/default_tests_vectice.py

diff --git a/Validation test/test_suites_config/README.md b/Validation test/test_suites_config/README.md
new file mode 100644
index 0000000..349c78f
--- /dev/null
+++ b/Validation test/test_suites_config/README.md	
@@ -0,0 +1,28 @@
+## List of validation tests provided by Vectice (source code from PiML)
+| **Category**                 | **Test Name**                    | **Function**                         |
+|------------------------------|----------------------------------|--------------------------------------|
+| **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |
+|                              | Confusion Matrix                 | `conf_matrix`                        |
+|                              | Explainability                   | `explainability`                     |
+|                              | Feature Importance               | `feature_importance`                 |
+|                              | Label Drift                      | `label_drift`                        |
+|                              | Prediction Drift                 | `prediction_drift`                   |
+|                              | Recall by class                  | `recall_by_class `                   |
+|                              | Precision by class               | `precision_by_class `                   |
+|                              | **Binary Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift` |
+|                              | **Multiclass Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift`, `recall_by_class `, `precision_by_class ` |
+| **Data Privacy Tests**       | Sensitive Data Check             | `sensitive_data_check`               |
+|                              | PII Check                        | `pii_check`                          |
+|                              | Sensitive Data Type Check        | `sensitive_data_type_check`          |
+| **Data Quality Tests**       | Dataset Split Validation         | `test_dataset_split`                 |
+|                              | IQR and Outliers                 | `iqr_and_outliers`                   |
+|                              | **Dataset Quality suite**    | `test_dataset_split`, `iqr_and_outliers` |
+| **Regression Tests**         | Residuals Plot                   | `plot_residuals`                     |
+|                              | R² Score                         | `r2_score`                           |
+|                              | Explainability                   | `explainability`                     |
+|                              | Feature Importance               | `feature_importance`                 |
+|                              | Target Drift                     | `target_drift`                       |
+|                              | Prediction Drift                 | `prediction_drift`                   |
+|                              | **Regression suite**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
+
+
diff --git a/Validation test/test_suites_config/master_config_test_suites.py b/Validation test/test_suites_config/master_config_test_suites.py
new file mode 100644
index 0000000..24a5005
--- /dev/null
+++ b/Validation test/test_suites_config/master_config_test_suites.py	
@@ -0,0 +1,65 @@
+# import the Vectice provided probability of default validation tests
+from vectice.models.test_library.binary_classification_test import (
+    plot_roc_curve,
+    conf_matrix,
+    explainability,
+    feature_importance,
+    label_drift,
+    prediction_drift,
+)
+
+
+# custom data quality validation tests
+from test_modules.data_quality_modules import (
+    test_dataset_split,
+    iqr_and_outliers,
+)
+
+# custom data privacy validation tests
+from test_modules.data_privacy_modules import (
+    sensitive_data_check,
+    sensitive_data_type_check,
+    pii_check,
+)
+
+from test_modules.correlation_matrix_module import (
+    plot_correlation_matrix
+)
+
+
+# The master test suite file is used to map all ADDITIONAL suite of test which can be run.
+# The tests can be provided by Vectice or custom functions from your modules.
+# Vectice uses this configuration to simply identify and bundle available tests into suite, when you run
+# your validations in your notebook.
+
+# Accumulation and mapping of all validation tests to be run for the PD model suite
+PD_model_suite= {
+    "binary_suite": [
+        plot_roc_curve,
+        conf_matrix,
+        explainability,
+        feature_importance,
+        label_drift,
+        prediction_drift,
+    ],
+    "data_quality_ext": [
+        test_dataset_split,
+        iqr_and_outliers,
+        plot_correlation_matrix,
+    ],
+    "corr_matrix_ext": [
+        plot_correlation_matrix,
+    ],
+}
+
+# Map the tests to be used for data privacy validation
+Robustness_suite = {
+    "sensitive_data_check": sensitive_data_check,
+    "pii_check": pii_check,
+    "sensitive_data_type_check": sensitive_data_type_check,
+    "data_privacy_full_suite": [
+        sensitive_data_check,
+        pii_check,
+        sensitive_data_type_check,
+    ],
+}
diff --git a/Validation test/test_suites_config/test_modules/correlation_matrix_module.py b/Validation test/test_suites_config/test_modules/correlation_matrix_module.py
new file mode 100644
index 0000000..3935f9a
--- /dev/null
+++ b/Validation test/test_suites_config/test_modules/correlation_matrix_module.py	
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+
+if TYPE_CHECKING:
+    from matplotlib.container import BarContainer
+    from numpy import ndarray
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation import TestSuiteReturnType
+
+_logger = logging.getLogger(__name__)
+
+def plot_correlation_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+    
+    subset_columns = internal_parameters.get("subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"])
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    # Select subset of columns
+    training_df = training_df[subset_columns]
+
+    # Calculate the correlation matrix
+    corr_matrix = training_df.corr()
+
+    # Plot the correlation matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
+    plt.title("Correlation Matrix")
+
+    # Save the plot
+    file_path = "Correlation_matrix_plot.png"
+    plt.savefig(file_path)
+    plt.close()
+
+    # RETURN IN THE VECTICE EXPECTED FORMART
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
\ No newline at end of file
diff --git a/Validation test/test_suites_config/test_modules/data_privacy_modules.py b/Validation test/test_suites_config/test_modules/data_privacy_modules.py
new file mode 100644
index 0000000..90d851f
--- /dev/null
+++ b/Validation test/test_suites_config/test_modules/data_privacy_modules.py	
@@ -0,0 +1,148 @@
+# Write custom tests which can be used to validate your datasets security
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pandas as pd
+
+if TYPE_CHECKING:
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+
+def sensitive_data_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+    sensitive_keywords: list | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None or sensitive_keywords is None:
+        return None
+
+    # Initialize a dictionary to hold counts of sensitive data
+    sensitive_counts = {keyword: 0 for keyword in sensitive_keywords}
+
+    # Check each cell in the DataFrame for sensitive keywords
+    for keyword in sensitive_keywords:
+        sensitive_counts[keyword] = dataset.apply(
+            lambda x: x.astype(str).str.contains(keyword, case=False).sum()
+        ).sum()
+
+    # Create a DataFrame with the results
+    sensitive_counts_df = pd.DataFrame(
+        {
+            "Sensitive Keyword": list(sensitive_counts.keys()),
+            "Count": list(sensitive_counts.values()),
+        }
+    )
+
+    table = Table(sensitive_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+def pii_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    # Define common PII patterns
+    pii_patterns = {
+        "name": r"\b[A-Z][a-z]*\b",
+        "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b",
+        "phone": r"\b(\+?[\d]{1,3}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,9})\b",
+    }
+
+    # Initialize a dictionary to hold counts of PII matches
+    pii_counts = {key: 0 for key in pii_patterns.keys()}
+
+    # Check each column in the DataFrame for PII patterns
+    for column in dataset.columns:
+        for key, pattern in pii_patterns.items():
+            pii_counts[key] += (
+                dataset[column]
+                .astype(str)
+                .str.contains(pattern, case=False, regex=True)
+                .sum()
+            )
+
+    # Create a DataFrame with the results
+    pii_counts_df = pd.DataFrame(
+        {"PII Type": list(pii_counts.keys()), "Count": list(pii_counts.values())}
+    )
+
+    table = Table(pii_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+def sensitive_data_type_check(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: ArrayLike | list | None = None,
+    target_column: ArrayLike | str | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    # Define patterns for sensitive data types
+    sensitive_data_patterns = {
+        "credit_card": r"\b(?:\d[ -]*?){13,16}\b",
+        "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
+    }
+
+    # Initialize a dictionary to hold counts of sensitive data type matches
+    sensitive_data_counts = {key: 0 for key in sensitive_data_patterns.keys()}
+
+    # Check each column in the DataFrame for sensitive data type patterns
+    for column in dataset.columns:
+        for key, pattern in sensitive_data_patterns.items():
+            sensitive_data_counts[key] += (
+                dataset[column]
+                .astype(str)
+                .str.contains(pattern, case=False, regex=True)
+                .sum()
+            )
+
+    # Create a DataFrame with the results
+    sensitive_data_counts_df = pd.DataFrame(
+        {
+            "Sensitive Data Type": list(sensitive_data_counts.keys()),
+            "Count": list(sensitive_data_counts.values()),
+        }
+    )
+
+    table = Table(sensitive_data_counts_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
diff --git a/Validation test/test_suites_config/test_modules/data_quality_modules.py b/Validation test/test_suites_config/test_modules/data_quality_modules.py
new file mode 100644
index 0000000..05b3ae5
--- /dev/null
+++ b/Validation test/test_suites_config/test_modules/data_quality_modules.py	
@@ -0,0 +1,116 @@
+# Write custom tests which can be used to validate your datasets quality
+from __future__ import annotations
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from vectice.models.validation_dataset import TestSuiteReturnType
+
+
+# custom test which can be used for dataset validation
+def test_dataset_split(
+    dataset: DataFrame | None,
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    feature_columns: list | None = None,
+    threshold: float | None = None,
+) -> TestSuiteReturnType:
+    from vectice import Table
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    total_df = len(training_df) + len(testing_df)
+
+    # Create a DataFrame with the results
+    datasplit_df = pd.DataFrame(
+        {
+            "Dataset": ["Train", "Test", "Total"],
+            "Size": [len(training_df), len(testing_df), total_df],
+            "Percentage": [
+                (len(training_df) / total_df * 100),
+                (len(testing_df) / total_df * 100),
+                100,
+            ],
+        }
+    )
+
+    table = Table(datasplit_df)
+
+    return TestSuiteReturnType(
+        properties={},
+        tables=[table],
+        attachments=[],
+    )
+
+
+# custom test which can be used for dataset validation
+def iqr_and_outliers(
+    dataset: DataFrame | None = None,
+    training_df: DataFrame | None = None,
+    testing_df: DataFrame | None = None,
+    feature_columns: list | None = None,
+    target_column: str | None = None,
+    threshold: float | None = None,
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation_dataset import TestSuiteReturnType
+
+    if dataset is None:
+        return None
+
+    files = []
+    # disable plots showing
+    plt.ioff()
+    for column in dataset.select_dtypes(include=[np.number]).columns:
+        file_name = f"iqr_and_outliers_{column}.png"
+
+        temp_file_path = file_name
+
+        Q1 = dataset[column].quantile(0.25)
+        Q3 = dataset[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+
+        plt.figure(figsize=(10, 6))
+        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
+        plt.axvline(
+            Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}"
+        )
+        plt.axvline(
+            Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}"
+        )
+        plt.axvline(
+            dataset[column].median(),
+            color="g",
+            linestyle="-",
+            label=f"Median: {dataset[column].median():.2f}",
+        )
+        plt.fill_betweenx(
+            [0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}"
+        )
+
+        # Highlight outliers
+        outliers = dataset[
+            (dataset[column] < lower_bound) | (dataset[column] > upper_bound)
+        ][column]
+        plt.scatter(
+            outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5
+        )
+
+        plt.title(f"Histogram with IQR and Outliers for {column}")
+        plt.xlabel(column)
+        plt.ylabel("Frequency")
+        plt.legend()
+        plt.savefig(temp_file_path, bbox_inches="tight")
+        files.append(temp_file_path)
+
+    plt.ion()
+    return TestSuiteReturnType(
+        properties={},
+        tables=[],
+        attachments=files,
+    )
diff --git a/Validation test/test_suites_config/test_modules/default_tests_vectice.py b/Validation test/test_suites_config/test_modules/default_tests_vectice.py
new file mode 100644
index 0000000..5ec37db
--- /dev/null
+++ b/Validation test/test_suites_config/test_modules/default_tests_vectice.py	
@@ -0,0 +1,523 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import shap
+from scipy.stats import chi2_contingency, ks_2samp
+from sklearn.metrics import auc, confusion_matrix, precision_score, recall_score, roc_curve
+
+if TYPE_CHECKING:
+    from matplotlib.container import BarContainer
+    from numpy import ndarray
+    from numpy.typing import ArrayLike
+    from pandas import DataFrame
+
+    from vectice.models.validation import TestSuiteReturnType
+
+_logger = logging.getLogger(__name__)
+
+
+def plot_roc_curve(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"train_color": "green", "test_color": "blue", "threshold": 0.5},
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation import TestSuiteReturnType
+
+    X_train = training_df.drop(columns=[target_column])
+    X_test = testing_df.drop(columns=[target_column])
+    training_prediction_proba = predictor.predict_proba(X_train)[:, 1]
+    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
+
+    if predict_proba_train is not None:
+        training_prediction_proba = predict_proba_train
+
+    if predict_proba_test is not None:
+        testing_prediction_proba = predict_proba_test
+
+    fpr_train, tpr_train, _ = roc_curve(training_df[target_column], training_prediction_proba)
+    roc_auc_train = auc(fpr_train, tpr_train)
+
+    fpr_test, tpr_test, _ = roc_curve(testing_df[target_column], testing_prediction_proba)
+    roc_auc_test = auc(fpr_test, tpr_test)
+
+    file_path = "ROC_CURVE.png"
+
+    plt.figure(figsize=(8, 6))
+    plt.plot(
+        fpr_train,
+        tpr_train,
+        color=internal_parameters["train_color"],
+        linestyle="--",
+        label=f"Train ROC curve (AUC = {roc_auc_train:.2f})",
+    )
+    plt.plot(
+        fpr_test,
+        tpr_test,
+        color=internal_parameters["test_color"],
+        label=f"Test ROC curve (AUC = {roc_auc_test:.2f})",
+    )
+    plt.plot([0, 1], [0, 1], color="red", linestyle="--")
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    plt.title("Receiver Operating Characteristic (ROC) Curve")
+    plt.legend()
+    plt.grid(True)
+    plt.savefig(file_path)
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={"_ROC_auc_train": roc_auc_train, "_ROC_auc_test": roc_auc_test},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
+
+
+def conf_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"threshold": 0.5, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    threshold = internal_parameters["threshold"]
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    X_test = testing_df.drop(columns=[target_column])
+    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
+
+    if predict_proba_test is not None:
+        testing_prediction_proba = predict_proba_test
+
+    testing_prediction = (testing_prediction_proba >= threshold).astype(int)
+
+    cm = confusion_matrix(testing_df[target_column], testing_prediction)
+    total_samples = np.sum(cm)
+
+    precision = precision_score(testing_df[target_column], testing_prediction)
+    recall = recall_score(testing_df[target_column], testing_prediction)
+
+    # Plot confusion matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(cm, annot=True, cmap=cmap, fmt="d", annot_kws={"fontsize": 12}, cbar=False)
+    for i in range(len(cm)):
+        for j in range(len(cm)):
+            plt.text(
+                j + 0.5,
+                i + 0.75,
+                f"{cm[i][j]/total_samples*100:.2f}%",
+                ha="center",
+                va="center",
+                color="black",
+                fontsize=12,
+            )
+    plt.xlabel("Predicted Label")
+    plt.ylabel("True Label")
+    plt.title(f"Confusion Matrix\nPrecision: {precision:.2f}, Recall: {recall:.2f}")
+
+    # Save the plot
+    file_path = "Confusion_matrix_plot.png"
+    plt.savefig(file_path)
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={"_precision_test": precision, "_recall_test": recall},
+        properties={"Threshold": threshold},
+        tables=[],
+        attachments=[file_path],
+    )
+
+
+def explainability(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    explainer = shap.Explainer(predictor, training_df.drop(columns=[target_column]))
+    shap_values = explainer(training_df.drop(columns=[target_column]).head(1000))
+    shap.summary_plot(
+        shap_values[:, :, 0], training_df.drop(columns=[target_column]).head(1000), max_display=10, show=False
+    )
+    summary_plot_path = "SHAP_summary_plot.png"
+    plt.savefig(summary_plot_path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(metrics={}, properties={}, tables=[], attachments=[summary_plot_path])
+
+
+def feature_importance(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    explainer = shap.Explainer(predictor, training_df.drop(columns=[target_column]))
+    shap_values = explainer(training_df.drop(columns=[target_column]).head(1000))
+    clustering = shap.utils.hclust(
+        training_df.drop(columns=[target_column]).head(1000), training_df[target_column].head(1000)
+    )
+    shap.plots.bar(shap_values[:, :, 0], clustering=clustering, max_display=10, show=False)
+
+    feature_importance_path = "feature_importance.png"
+    plt.savefig(feature_importance_path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(metrics={}, properties={}, tables=[], attachments=[feature_importance_path])
+
+
+def cramers_v_score(x: ndarray[Any, Any], y: ndarray[Any, Any]) -> float:
+
+    min_length = min(len(x), len(y), 4000)
+    x = x[:min_length]
+    y = y[:min_length]
+    confusion_matrix = pd.crosstab(x, y)
+    chi2 = chi2_contingency(confusion_matrix)[0]
+    n = confusion_matrix.sum().sum()
+    phi2 = chi2 / n
+    r, k = confusion_matrix.shape
+    phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
+    rcorr = r - ((r - 1) ** 2) / (n - 1)
+    kcorr = k - ((k - 1) ** 2) / (n - 1)
+    return np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))
+
+
+def ks_score(x: ndarray[Any, Any], y: ndarray[Any, Any]) -> float:
+    min_length = min(len(x), len(y), 4000)
+    x = x[:min_length]
+    y = y[:min_length]
+    ks_statistic, _ = ks_2samp(x, y)
+
+    return ks_statistic
+
+
+def prediction_drift(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    threshold: float,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    X_train = training_df.drop(columns=[target_column])
+    X_test = testing_df.drop(columns=[target_column])
+    training_prediction_proba = predictor.predict_proba(X_train)[:, 1]
+    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
+
+    if predict_proba_train is not None:
+        training_prediction_proba = predict_proba_train
+
+    if predict_proba_test is not None:
+        testing_prediction_proba = predict_proba_test
+
+    train_predictions = np.array(training_prediction_proba)
+    test_predictions = np.array(testing_prediction_proba)
+
+    light_red = "#FF8A80"  # Light Red
+    darker_blue = "#1565C0"  # Darker Blue
+    sns.set_palette([darker_blue, light_red])
+
+    _, ax = plt.subplots(figsize=(8, 6))
+
+    sns.kdeplot(train_predictions, color=light_red, label="Train Predictions", fill=True)
+    sns.kdeplot(test_predictions, color=darker_blue, label="Test Predictions", fill=True)
+
+    # Plot vertical lines for means using the specified colors
+    ax.axvline(  # pyright: ignore[reportAttributeAccessIssue]
+        np.mean(train_predictions),  # pyright: ignore[reportArgumentType]
+        color=light_red,
+        linestyle="--",
+        label="Train Mean",
+    )
+    ax.axvline(  # pyright: ignore[reportAttributeAccessIssue]
+        np.mean(test_predictions),  # pyright: ignore[reportArgumentType]
+        color=darker_blue,
+        linestyle="--",
+        label="Test Mean",
+    )
+
+    plt.xlabel("Predictions")
+    plt.ylabel("Density")
+    plt.title("Prediction Drift Plot (Kolmogorov-Smirnov drift score)")
+    plt.legend()
+    plt.grid(True)
+    path = "Prediction_drift.png"
+
+    # Calculate and print drift score
+    drift_score = ks_score(train_predictions, test_predictions)
+
+    # Set text position at the top
+    text_x = 0.5
+    text_y = 0.95
+    if drift_score < 0.1:
+        score_color = "green"
+    elif 0.1 <= drift_score <= 0.2:
+        score_color = "orange"
+    else:
+        score_color = "red"
+
+    plt.text(
+        text_x,
+        text_y,
+        f"Drift score = {drift_score:.2f}",
+        ha="center",
+        va="top",
+        color=score_color,
+        transform=ax.transAxes,  # pyright: ignore[reportAttributeAccessIssue]
+    )
+
+    plt.savefig(path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={}, properties={"_prediction_drift_score": drift_score}, tables=[], attachments=[path]
+    )
+
+
+def label_drift(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    train_labels = np.array(training_df[target_column])
+    test_labels = np.array(testing_df[target_column])
+
+    light_red = "#FF8A80"  # Light Red
+    darker_blue = "#1565C0"  # Darker Blue
+    sns.set_palette([darker_blue, light_red])
+
+    _, ax = plt.subplots(figsize=(8, 6))
+
+    bar_width = 0.35
+    index = np.arange(2)
+
+    train_counts = [np.sum(train_labels == 0) / len(train_labels), np.sum(train_labels == 1) / len(train_labels)]
+    test_counts = [np.sum(test_labels == 0) / len(test_labels), np.sum(test_labels == 1) / len(test_labels)]
+
+    train_bar = ax.bar(  # pyright: ignore[reportAttributeAccessIssue]
+        index, train_counts, bar_width, label="Train Labels"
+    )
+    test_bar = ax.bar(  # pyright: ignore[reportAttributeAccessIssue]
+        index + bar_width, test_counts, bar_width, label="Test Labels"
+    )
+
+    ax.set_xlabel("Labels")  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_ylabel("Frequency")  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_title("Label Drift Plot (Cramer's V drift score)")  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_xticks(index + bar_width / 2)  # pyright: ignore[reportAttributeAccessIssue]
+    ax.set_xticklabels(["0", "1"])  # pyright: ignore[reportAttributeAccessIssue]
+    ax.legend()  # pyright: ignore[reportAttributeAccessIssue]
+
+    def autolabel(bars: BarContainer):
+        """Attach a text label above each bar in *bars*, displaying its height."""
+        for bar in bars:
+            height = bar.get_height()
+            ax.annotate(  # pyright: ignore[reportAttributeAccessIssue]
+                f"{height:.2f}",
+                xy=(bar.get_x() + bar.get_width() / 2, height),
+                xytext=(0, 3),
+                textcoords="offset points",
+                ha="center",
+                va="bottom",
+            )
+
+    autolabel(train_bar)
+    autolabel(test_bar)
+
+    drift_score = cramers_v_score(train_labels, test_labels)
+    if drift_score < 0.1:
+        score_color = "green"
+    elif 0.1 <= drift_score <= 0.2:
+        score_color = "orange"
+    else:
+        score_color = "red"
+
+    ax.text(  # pyright: ignore[reportAttributeAccessIssue]
+        0.5,
+        0.95,
+        f"Drift score = {drift_score:.2f}",
+        ha="center",
+        va="top",
+        color=score_color,
+        transform=ax.transAxes,  # pyright: ignore[reportAttributeAccessIssue]
+    )
+
+    plt.tight_layout()
+    path = "Label_drift.png"
+    plt.savefig(path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={}, properties={"_label_drift_score": drift_score}, tables=[], attachments=[path]
+    )
+
+
+def plot_correlation_matrix(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice.models.validation import TestSuiteReturnType
+
+    subset_columns = internal_parameters.get(
+        "subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"]
+    )
+    cmap = internal_parameters.get("cmap", "Blues")
+
+    # Select subset of columns
+    training_df = training_df[subset_columns]
+
+    # Calculate the correlation matrix
+    corr_matrix = training_df.corr()
+
+    # Plot the correlation matrix
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
+    plt.title("Correlation Matrix")
+
+    # Save the plot
+    file_path = "Correlation_matrix_plot.png"
+    plt.savefig(file_path, bbox_inches="tight")
+    plt.close()
+
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=[file_path],
+    )
+
+
+# custom test which can be used for dataset validation
+def test_dataset_split(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType:
+    from vectice import Table
+    from vectice.models.validation import TestSuiteReturnType
+
+    total_df = len(training_df) + len(testing_df)
+
+    # Create a DataFrame with the results
+    datasplit_df = pd.DataFrame(
+        {
+            "Dataset": ["Train", "Test", "Total"],
+            "Size": [len(training_df), len(testing_df), total_df],
+            "Percentage": [
+                (len(training_df) / total_df * 100),
+                (len(testing_df) / total_df * 100),
+                100,
+            ],
+        }
+    )
+
+    table = Table(datasplit_df)
+
+    return TestSuiteReturnType(metrics={}, properties={}, tables=[table], attachments=[])
+
+
+# custom test which can be used for dataset validation
+def iqr_and_outliers(
+    training_df: DataFrame,
+    testing_df: DataFrame,
+    target_column: str,
+    predictor: Any,
+    predict_proba_train: ArrayLike | None,
+    predict_proba_test: ArrayLike | None,
+    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
+) -> TestSuiteReturnType | None:
+    from vectice.models.validation import TestSuiteReturnType
+
+    dataset = training_df
+
+    files = []
+    # disable plots showing
+    if internal_parameters.get("subset_columns") is not None:
+        columns = internal_parameters.get("subset_columns")
+    else:
+        columns = dataset.select_dtypes(include=[np.number]).columns[:10]
+    plt.ioff()
+    for column in columns:  # type: ignore
+        file_name = f"iqr_and_outliers_{column}.png"
+
+        temp_file_path = file_name
+
+        Q1 = dataset[column].quantile(0.25)
+        Q3 = dataset[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+
+        plt.figure(figsize=(10, 6))
+        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
+        plt.axvline(Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}")
+        plt.axvline(Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}")
+        plt.axvline(
+            dataset[column].median(),
+            color="g",
+            linestyle="-",
+            label=f"Median: {dataset[column].median():.2f}",
+        )
+        plt.fill_betweenx([0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}")
+
+        # Highlight outliers
+        outliers = dataset[(dataset[column] < lower_bound) | (dataset[column] > upper_bound)][column]
+        plt.scatter(outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5)
+
+        plt.title(f"Histogram with IQR and Outliers for {column}")
+        plt.xlabel(column)
+        plt.ylabel("Frequency")
+        plt.legend()
+        plt.savefig(temp_file_path, bbox_inches="tight")
+        files.append(temp_file_path)
+
+    plt.ion()
+    return TestSuiteReturnType(
+        metrics={},
+        properties={},
+        tables=[],
+        attachments=files,
+    )
\ No newline at end of file

From 33e485cd7dada0265530892c3d0180a82120569a Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Thu, 11 Jul 2024 17:44:24 -0700
Subject: [PATCH 19/20] change path

---
 {Validation test/test_suites_config => Validation}/README.md      | 0
 .../master_config_test_suites.py                                  | 0
 .../test_modules/correlation_matrix_module.py                     | 0
 .../test_modules/data_privacy_modules.py                          | 0
 .../test_modules/data_quality_modules.py                          | 0
 .../test_modules/default_tests_vectice.py                         | 0
 6 files changed, 0 insertions(+), 0 deletions(-)
 rename {Validation test/test_suites_config => Validation}/README.md (100%)
 rename {Validation test/test_suites_config => Validation}/master_config_test_suites.py (100%)
 rename {Validation test/test_suites_config => Validation}/test_modules/correlation_matrix_module.py (100%)
 rename {Validation test/test_suites_config => Validation}/test_modules/data_privacy_modules.py (100%)
 rename {Validation test/test_suites_config => Validation}/test_modules/data_quality_modules.py (100%)
 rename {Validation test/test_suites_config => Validation}/test_modules/default_tests_vectice.py (100%)

diff --git a/Validation test/test_suites_config/README.md b/Validation/README.md
similarity index 100%
rename from Validation test/test_suites_config/README.md
rename to Validation/README.md
diff --git a/Validation test/test_suites_config/master_config_test_suites.py b/Validation/master_config_test_suites.py
similarity index 100%
rename from Validation test/test_suites_config/master_config_test_suites.py
rename to Validation/master_config_test_suites.py
diff --git a/Validation test/test_suites_config/test_modules/correlation_matrix_module.py b/Validation/test_modules/correlation_matrix_module.py
similarity index 100%
rename from Validation test/test_suites_config/test_modules/correlation_matrix_module.py
rename to Validation/test_modules/correlation_matrix_module.py
diff --git a/Validation test/test_suites_config/test_modules/data_privacy_modules.py b/Validation/test_modules/data_privacy_modules.py
similarity index 100%
rename from Validation test/test_suites_config/test_modules/data_privacy_modules.py
rename to Validation/test_modules/data_privacy_modules.py
diff --git a/Validation test/test_suites_config/test_modules/data_quality_modules.py b/Validation/test_modules/data_quality_modules.py
similarity index 100%
rename from Validation test/test_suites_config/test_modules/data_quality_modules.py
rename to Validation/test_modules/data_quality_modules.py
diff --git a/Validation test/test_suites_config/test_modules/default_tests_vectice.py b/Validation/test_modules/default_tests_vectice.py
similarity index 100%
rename from Validation test/test_suites_config/test_modules/default_tests_vectice.py
rename to Validation/test_modules/default_tests_vectice.py

From 6868c9bf0b12d6309a3735dfaf36976c51d2c6fc Mon Sep 17 00:00:00 2001
From: BDaversa <bdaversa@vectice.com>
Date: Sun, 14 Jul 2024 14:37:57 -0700
Subject: [PATCH 20/20] save wrappers

---
 24.2/samples/test_suites_config/README.md     |  28 -
 .../master_config_test_suites.py              |  65 ---
 .../test_modules/correlation_matrix_module.py |  58 --
 .../test_modules/data_privacy_modules.py      | 148 -----
 .../test_modules/data_quality_modules.py      | 116 ----
 .../test_modules/default_tests_vectice.py     | 523 ------------------
 Validation/vectice_wrappers.py                |  73 +++
 7 files changed, 73 insertions(+), 938 deletions(-)
 delete mode 100644 24.2/samples/test_suites_config/README.md
 delete mode 100644 24.2/samples/test_suites_config/master_config_test_suites.py
 delete mode 100644 24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
 delete mode 100644 24.2/samples/test_suites_config/test_modules/data_privacy_modules.py
 delete mode 100644 24.2/samples/test_suites_config/test_modules/data_quality_modules.py
 delete mode 100644 24.2/samples/test_suites_config/test_modules/default_tests_vectice.py
 create mode 100644 Validation/vectice_wrappers.py

diff --git a/24.2/samples/test_suites_config/README.md b/24.2/samples/test_suites_config/README.md
deleted file mode 100644
index 349c78f..0000000
--- a/24.2/samples/test_suites_config/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-## List of validation tests provided by Vectice (source code from PiML)
-| **Category**                 | **Test Name**                    | **Function**                         |
-|------------------------------|----------------------------------|--------------------------------------|
-| **Classification Tests**     | ROC Curve                        | `plot_roc_curve`                     |
-|                              | Confusion Matrix                 | `conf_matrix`                        |
-|                              | Explainability                   | `explainability`                     |
-|                              | Feature Importance               | `feature_importance`                 |
-|                              | Label Drift                      | `label_drift`                        |
-|                              | Prediction Drift                 | `prediction_drift`                   |
-|                              | Recall by class                  | `recall_by_class `                   |
-|                              | Precision by class               | `precision_by_class `                   |
-|                              | **Binary Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift` |
-|                              | **Multiclass Classification suite** | `plot_roc_curve`, `conf_matrix`, `explainability`, `feature_importance`, `label_drift`, `prediction_drift`, `recall_by_class `, `precision_by_class ` |
-| **Data Privacy Tests**       | Sensitive Data Check             | `sensitive_data_check`               |
-|                              | PII Check                        | `pii_check`                          |
-|                              | Sensitive Data Type Check        | `sensitive_data_type_check`          |
-| **Data Quality Tests**       | Dataset Split Validation         | `test_dataset_split`                 |
-|                              | IQR and Outliers                 | `iqr_and_outliers`                   |
-|                              | **Dataset Quality suite**    | `test_dataset_split`, `iqr_and_outliers` |
-| **Regression Tests**         | Residuals Plot                   | `plot_residuals`                     |
-|                              | R² Score                         | `r2_score`                           |
-|                              | Explainability                   | `explainability`                     |
-|                              | Feature Importance               | `feature_importance`                 |
-|                              | Target Drift                     | `target_drift`                       |
-|                              | Prediction Drift                 | `prediction_drift`                   |
-|                              | **Regression suite**         | `plot_residuals`, `r2_score`, `explainability`, `feature_importance`, `target_drift`, `prediction_drift` |
-
-
diff --git a/24.2/samples/test_suites_config/master_config_test_suites.py b/24.2/samples/test_suites_config/master_config_test_suites.py
deleted file mode 100644
index 24a5005..0000000
--- a/24.2/samples/test_suites_config/master_config_test_suites.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# import the Vectice provided probability of default validation tests
-from vectice.models.test_library.binary_classification_test import (
-    plot_roc_curve,
-    conf_matrix,
-    explainability,
-    feature_importance,
-    label_drift,
-    prediction_drift,
-)
-
-
-# custom data quality validation tests
-from test_modules.data_quality_modules import (
-    test_dataset_split,
-    iqr_and_outliers,
-)
-
-# custom data privacy validation tests
-from test_modules.data_privacy_modules import (
-    sensitive_data_check,
-    sensitive_data_type_check,
-    pii_check,
-)
-
-from test_modules.correlation_matrix_module import (
-    plot_correlation_matrix
-)
-
-
-# The master test suite file is used to map all ADDITIONAL suite of test which can be run.
-# The tests can be provided by Vectice or custom functions from your modules.
-# Vectice uses this configuration to simply identify and bundle available tests into suite, when you run
-# your validations in your notebook.
-
-# Accumulation and mapping of all validation tests to be run for the PD model suite
-PD_model_suite= {
-    "binary_suite": [
-        plot_roc_curve,
-        conf_matrix,
-        explainability,
-        feature_importance,
-        label_drift,
-        prediction_drift,
-    ],
-    "data_quality_ext": [
-        test_dataset_split,
-        iqr_and_outliers,
-        plot_correlation_matrix,
-    ],
-    "corr_matrix_ext": [
-        plot_correlation_matrix,
-    ],
-}
-
-# Map the tests to be used for data privacy validation
-Robustness_suite = {
-    "sensitive_data_check": sensitive_data_check,
-    "pii_check": pii_check,
-    "sensitive_data_type_check": sensitive_data_type_check,
-    "data_privacy_full_suite": [
-        sensitive_data_check,
-        pii_check,
-        sensitive_data_type_check,
-    ],
-}
diff --git a/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py b/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
deleted file mode 100644
index 3935f9a..0000000
--- a/24.2/samples/test_suites_config/test_modules/correlation_matrix_module.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any, Dict
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-
-
-if TYPE_CHECKING:
-    from matplotlib.container import BarContainer
-    from numpy import ndarray
-    from numpy.typing import ArrayLike
-    from pandas import DataFrame
-
-    from vectice.models.validation import TestSuiteReturnType
-
-_logger = logging.getLogger(__name__)
-
-def plot_correlation_matrix(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-    
-    subset_columns = internal_parameters.get("subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"])
-    cmap = internal_parameters.get("cmap", "Blues")
-
-    # Select subset of columns
-    training_df = training_df[subset_columns]
-
-    # Calculate the correlation matrix
-    corr_matrix = training_df.corr()
-
-    # Plot the correlation matrix
-    plt.figure(figsize=(10, 8))
-    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
-    plt.title("Correlation Matrix")
-
-    # Save the plot
-    file_path = "Correlation_matrix_plot.png"
-    plt.savefig(file_path)
-    plt.close()
-
-    # RETURN IN THE VECTICE EXPECTED FORMART
-    return TestSuiteReturnType(
-        metrics={},
-        properties={},
-        tables=[],
-        attachments=[file_path],
-    )
\ No newline at end of file
diff --git a/24.2/samples/test_suites_config/test_modules/data_privacy_modules.py b/24.2/samples/test_suites_config/test_modules/data_privacy_modules.py
deleted file mode 100644
index 90d851f..0000000
--- a/24.2/samples/test_suites_config/test_modules/data_privacy_modules.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Write custom tests which can be used to validate your datasets security
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import pandas as pd
-
-if TYPE_CHECKING:
-    from numpy.typing import ArrayLike
-    from pandas import DataFrame
-
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-
-def sensitive_data_check(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: ArrayLike | list | None = None,
-    target_column: ArrayLike | str | None = None,
-    sensitive_keywords: list | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None or sensitive_keywords is None:
-        return None
-
-    # Initialize a dictionary to hold counts of sensitive data
-    sensitive_counts = {keyword: 0 for keyword in sensitive_keywords}
-
-    # Check each cell in the DataFrame for sensitive keywords
-    for keyword in sensitive_keywords:
-        sensitive_counts[keyword] = dataset.apply(
-            lambda x: x.astype(str).str.contains(keyword, case=False).sum()
-        ).sum()
-
-    # Create a DataFrame with the results
-    sensitive_counts_df = pd.DataFrame(
-        {
-            "Sensitive Keyword": list(sensitive_counts.keys()),
-            "Count": list(sensitive_counts.values()),
-        }
-    )
-
-    table = Table(sensitive_counts_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
-
-
-def pii_check(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: ArrayLike | list | None = None,
-    target_column: ArrayLike | str | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    # Define common PII patterns
-    pii_patterns = {
-        "name": r"\b[A-Z][a-z]*\b",
-        "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b",
-        "phone": r"\b(\+?[\d]{1,3}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,4}[-.\s]?[\d]{1,9})\b",
-    }
-
-    # Initialize a dictionary to hold counts of PII matches
-    pii_counts = {key: 0 for key in pii_patterns.keys()}
-
-    # Check each column in the DataFrame for PII patterns
-    for column in dataset.columns:
-        for key, pattern in pii_patterns.items():
-            pii_counts[key] += (
-                dataset[column]
-                .astype(str)
-                .str.contains(pattern, case=False, regex=True)
-                .sum()
-            )
-
-    # Create a DataFrame with the results
-    pii_counts_df = pd.DataFrame(
-        {"PII Type": list(pii_counts.keys()), "Count": list(pii_counts.values())}
-    )
-
-    table = Table(pii_counts_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
-
-
-def sensitive_data_type_check(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: ArrayLike | list | None = None,
-    target_column: ArrayLike | str | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    # Define patterns for sensitive data types
-    sensitive_data_patterns = {
-        "credit_card": r"\b(?:\d[ -]*?){13,16}\b",
-        "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
-    }
-
-    # Initialize a dictionary to hold counts of sensitive data type matches
-    sensitive_data_counts = {key: 0 for key in sensitive_data_patterns.keys()}
-
-    # Check each column in the DataFrame for sensitive data type patterns
-    for column in dataset.columns:
-        for key, pattern in sensitive_data_patterns.items():
-            sensitive_data_counts[key] += (
-                dataset[column]
-                .astype(str)
-                .str.contains(pattern, case=False, regex=True)
-                .sum()
-            )
-
-    # Create a DataFrame with the results
-    sensitive_data_counts_df = pd.DataFrame(
-        {
-            "Sensitive Data Type": list(sensitive_data_counts.keys()),
-            "Count": list(sensitive_data_counts.values()),
-        }
-    )
-
-    table = Table(sensitive_data_counts_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
diff --git a/24.2/samples/test_suites_config/test_modules/data_quality_modules.py b/24.2/samples/test_suites_config/test_modules/data_quality_modules.py
deleted file mode 100644
index 05b3ae5..0000000
--- a/24.2/samples/test_suites_config/test_modules/data_quality_modules.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Write custom tests which can be used to validate your datasets quality
-from __future__ import annotations
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from pandas import DataFrame
-from vectice.models.validation_dataset import TestSuiteReturnType
-
-
-# custom test which can be used for dataset validation
-def test_dataset_split(
-    dataset: DataFrame | None,
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    feature_columns: list | None = None,
-    threshold: float | None = None,
-) -> TestSuiteReturnType:
-    from vectice import Table
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    total_df = len(training_df) + len(testing_df)
-
-    # Create a DataFrame with the results
-    datasplit_df = pd.DataFrame(
-        {
-            "Dataset": ["Train", "Test", "Total"],
-            "Size": [len(training_df), len(testing_df), total_df],
-            "Percentage": [
-                (len(training_df) / total_df * 100),
-                (len(testing_df) / total_df * 100),
-                100,
-            ],
-        }
-    )
-
-    table = Table(datasplit_df)
-
-    return TestSuiteReturnType(
-        properties={},
-        tables=[table],
-        attachments=[],
-    )
-
-
-# custom test which can be used for dataset validation
-def iqr_and_outliers(
-    dataset: DataFrame | None = None,
-    training_df: DataFrame | None = None,
-    testing_df: DataFrame | None = None,
-    feature_columns: list | None = None,
-    target_column: str | None = None,
-    threshold: float | None = None,
-) -> TestSuiteReturnType | None:
-    from vectice.models.validation_dataset import TestSuiteReturnType
-
-    if dataset is None:
-        return None
-
-    files = []
-    # disable plots showing
-    plt.ioff()
-    for column in dataset.select_dtypes(include=[np.number]).columns:
-        file_name = f"iqr_and_outliers_{column}.png"
-
-        temp_file_path = file_name
-
-        Q1 = dataset[column].quantile(0.25)
-        Q3 = dataset[column].quantile(0.75)
-        IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-
-        plt.figure(figsize=(10, 6))
-        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
-        plt.axvline(
-            Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}"
-        )
-        plt.axvline(
-            Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}"
-        )
-        plt.axvline(
-            dataset[column].median(),
-            color="g",
-            linestyle="-",
-            label=f"Median: {dataset[column].median():.2f}",
-        )
-        plt.fill_betweenx(
-            [0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}"
-        )
-
-        # Highlight outliers
-        outliers = dataset[
-            (dataset[column] < lower_bound) | (dataset[column] > upper_bound)
-        ][column]
-        plt.scatter(
-            outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5
-        )
-
-        plt.title(f"Histogram with IQR and Outliers for {column}")
-        plt.xlabel(column)
-        plt.ylabel("Frequency")
-        plt.legend()
-        plt.savefig(temp_file_path, bbox_inches="tight")
-        files.append(temp_file_path)
-
-    plt.ion()
-    return TestSuiteReturnType(
-        properties={},
-        tables=[],
-        attachments=files,
-    )
diff --git a/24.2/samples/test_suites_config/test_modules/default_tests_vectice.py b/24.2/samples/test_suites_config/test_modules/default_tests_vectice.py
deleted file mode 100644
index 5ec37db..0000000
--- a/24.2/samples/test_suites_config/test_modules/default_tests_vectice.py
+++ /dev/null
@@ -1,523 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any, Dict
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-import shap
-from scipy.stats import chi2_contingency, ks_2samp
-from sklearn.metrics import auc, confusion_matrix, precision_score, recall_score, roc_curve
-
-if TYPE_CHECKING:
-    from matplotlib.container import BarContainer
-    from numpy import ndarray
-    from numpy.typing import ArrayLike
-    from pandas import DataFrame
-
-    from vectice.models.validation import TestSuiteReturnType
-
-_logger = logging.getLogger(__name__)
-
-
-def plot_roc_curve(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"train_color": "green", "test_color": "blue", "threshold": 0.5},
-) -> TestSuiteReturnType | None:
-    from vectice.models.validation import TestSuiteReturnType
-
-    X_train = training_df.drop(columns=[target_column])
-    X_test = testing_df.drop(columns=[target_column])
-    training_prediction_proba = predictor.predict_proba(X_train)[:, 1]
-    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
-
-    if predict_proba_train is not None:
-        training_prediction_proba = predict_proba_train
-
-    if predict_proba_test is not None:
-        testing_prediction_proba = predict_proba_test
-
-    fpr_train, tpr_train, _ = roc_curve(training_df[target_column], training_prediction_proba)
-    roc_auc_train = auc(fpr_train, tpr_train)
-
-    fpr_test, tpr_test, _ = roc_curve(testing_df[target_column], testing_prediction_proba)
-    roc_auc_test = auc(fpr_test, tpr_test)
-
-    file_path = "ROC_CURVE.png"
-
-    plt.figure(figsize=(8, 6))
-    plt.plot(
-        fpr_train,
-        tpr_train,
-        color=internal_parameters["train_color"],
-        linestyle="--",
-        label=f"Train ROC curve (AUC = {roc_auc_train:.2f})",
-    )
-    plt.plot(
-        fpr_test,
-        tpr_test,
-        color=internal_parameters["test_color"],
-        label=f"Test ROC curve (AUC = {roc_auc_test:.2f})",
-    )
-    plt.plot([0, 1], [0, 1], color="red", linestyle="--")
-    plt.xlabel("False Positive Rate")
-    plt.ylabel("True Positive Rate")
-    plt.title("Receiver Operating Characteristic (ROC) Curve")
-    plt.legend()
-    plt.grid(True)
-    plt.savefig(file_path)
-    plt.close()
-
-    return TestSuiteReturnType(
-        metrics={"_ROC_auc_train": roc_auc_train, "_ROC_auc_test": roc_auc_test},
-        properties={},
-        tables=[],
-        attachments=[file_path],
-    )
-
-
-def conf_matrix(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"threshold": 0.5, "cmap": "Blues"},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-
-    threshold = internal_parameters["threshold"]
-    cmap = internal_parameters.get("cmap", "Blues")
-
-    X_test = testing_df.drop(columns=[target_column])
-    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
-
-    if predict_proba_test is not None:
-        testing_prediction_proba = predict_proba_test
-
-    testing_prediction = (testing_prediction_proba >= threshold).astype(int)
-
-    cm = confusion_matrix(testing_df[target_column], testing_prediction)
-    total_samples = np.sum(cm)
-
-    precision = precision_score(testing_df[target_column], testing_prediction)
-    recall = recall_score(testing_df[target_column], testing_prediction)
-
-    # Plot confusion matrix
-    plt.figure(figsize=(10, 8))
-    sns.heatmap(cm, annot=True, cmap=cmap, fmt="d", annot_kws={"fontsize": 12}, cbar=False)
-    for i in range(len(cm)):
-        for j in range(len(cm)):
-            plt.text(
-                j + 0.5,
-                i + 0.75,
-                f"{cm[i][j]/total_samples*100:.2f}%",
-                ha="center",
-                va="center",
-                color="black",
-                fontsize=12,
-            )
-    plt.xlabel("Predicted Label")
-    plt.ylabel("True Label")
-    plt.title(f"Confusion Matrix\nPrecision: {precision:.2f}, Recall: {recall:.2f}")
-
-    # Save the plot
-    file_path = "Confusion_matrix_plot.png"
-    plt.savefig(file_path)
-    plt.close()
-
-    return TestSuiteReturnType(
-        metrics={"_precision_test": precision, "_recall_test": recall},
-        properties={"Threshold": threshold},
-        tables=[],
-        attachments=[file_path],
-    )
-
-
-def explainability(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-
-    explainer = shap.Explainer(predictor, training_df.drop(columns=[target_column]))
-    shap_values = explainer(training_df.drop(columns=[target_column]).head(1000))
-    shap.summary_plot(
-        shap_values[:, :, 0], training_df.drop(columns=[target_column]).head(1000), max_display=10, show=False
-    )
-    summary_plot_path = "SHAP_summary_plot.png"
-    plt.savefig(summary_plot_path, bbox_inches="tight")
-    plt.close()
-
-    return TestSuiteReturnType(metrics={}, properties={}, tables=[], attachments=[summary_plot_path])
-
-
-def feature_importance(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-
-    explainer = shap.Explainer(predictor, training_df.drop(columns=[target_column]))
-    shap_values = explainer(training_df.drop(columns=[target_column]).head(1000))
-    clustering = shap.utils.hclust(
-        training_df.drop(columns=[target_column]).head(1000), training_df[target_column].head(1000)
-    )
-    shap.plots.bar(shap_values[:, :, 0], clustering=clustering, max_display=10, show=False)
-
-    feature_importance_path = "feature_importance.png"
-    plt.savefig(feature_importance_path, bbox_inches="tight")
-    plt.close()
-
-    return TestSuiteReturnType(metrics={}, properties={}, tables=[], attachments=[feature_importance_path])
-
-
-def cramers_v_score(x: ndarray[Any, Any], y: ndarray[Any, Any]) -> float:
-
-    min_length = min(len(x), len(y), 4000)
-    x = x[:min_length]
-    y = y[:min_length]
-    confusion_matrix = pd.crosstab(x, y)
-    chi2 = chi2_contingency(confusion_matrix)[0]
-    n = confusion_matrix.sum().sum()
-    phi2 = chi2 / n
-    r, k = confusion_matrix.shape
-    phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
-    rcorr = r - ((r - 1) ** 2) / (n - 1)
-    kcorr = k - ((k - 1) ** 2) / (n - 1)
-    return np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))
-
-
-def ks_score(x: ndarray[Any, Any], y: ndarray[Any, Any]) -> float:
-    min_length = min(len(x), len(y), 4000)
-    x = x[:min_length]
-    y = y[:min_length]
-    ks_statistic, _ = ks_2samp(x, y)
-
-    return ks_statistic
-
-
-def prediction_drift(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    threshold: float,
-    internal_parameters: Dict[str, Any] = {},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-
-    X_train = training_df.drop(columns=[target_column])
-    X_test = testing_df.drop(columns=[target_column])
-    training_prediction_proba = predictor.predict_proba(X_train)[:, 1]
-    testing_prediction_proba = predictor.predict_proba(X_test)[:, 1]
-
-    if predict_proba_train is not None:
-        training_prediction_proba = predict_proba_train
-
-    if predict_proba_test is not None:
-        testing_prediction_proba = predict_proba_test
-
-    train_predictions = np.array(training_prediction_proba)
-    test_predictions = np.array(testing_prediction_proba)
-
-    light_red = "#FF8A80"  # Light Red
-    darker_blue = "#1565C0"  # Darker Blue
-    sns.set_palette([darker_blue, light_red])
-
-    _, ax = plt.subplots(figsize=(8, 6))
-
-    sns.kdeplot(train_predictions, color=light_red, label="Train Predictions", fill=True)
-    sns.kdeplot(test_predictions, color=darker_blue, label="Test Predictions", fill=True)
-
-    # Plot vertical lines for means using the specified colors
-    ax.axvline(  # pyright: ignore[reportAttributeAccessIssue]
-        np.mean(train_predictions),  # pyright: ignore[reportArgumentType]
-        color=light_red,
-        linestyle="--",
-        label="Train Mean",
-    )
-    ax.axvline(  # pyright: ignore[reportAttributeAccessIssue]
-        np.mean(test_predictions),  # pyright: ignore[reportArgumentType]
-        color=darker_blue,
-        linestyle="--",
-        label="Test Mean",
-    )
-
-    plt.xlabel("Predictions")
-    plt.ylabel("Density")
-    plt.title("Prediction Drift Plot (Kolmogorov-Smirnov drift score)")
-    plt.legend()
-    plt.grid(True)
-    path = "Prediction_drift.png"
-
-    # Calculate and print drift score
-    drift_score = ks_score(train_predictions, test_predictions)
-
-    # Set text position at the top
-    text_x = 0.5
-    text_y = 0.95
-    if drift_score < 0.1:
-        score_color = "green"
-    elif 0.1 <= drift_score <= 0.2:
-        score_color = "orange"
-    else:
-        score_color = "red"
-
-    plt.text(
-        text_x,
-        text_y,
-        f"Drift score = {drift_score:.2f}",
-        ha="center",
-        va="top",
-        color=score_color,
-        transform=ax.transAxes,  # pyright: ignore[reportAttributeAccessIssue]
-    )
-
-    plt.savefig(path, bbox_inches="tight")
-    plt.close()
-
-    return TestSuiteReturnType(
-        metrics={}, properties={"_prediction_drift_score": drift_score}, tables=[], attachments=[path]
-    )
-
-
-def label_drift(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-
-    train_labels = np.array(training_df[target_column])
-    test_labels = np.array(testing_df[target_column])
-
-    light_red = "#FF8A80"  # Light Red
-    darker_blue = "#1565C0"  # Darker Blue
-    sns.set_palette([darker_blue, light_red])
-
-    _, ax = plt.subplots(figsize=(8, 6))
-
-    bar_width = 0.35
-    index = np.arange(2)
-
-    train_counts = [np.sum(train_labels == 0) / len(train_labels), np.sum(train_labels == 1) / len(train_labels)]
-    test_counts = [np.sum(test_labels == 0) / len(test_labels), np.sum(test_labels == 1) / len(test_labels)]
-
-    train_bar = ax.bar(  # pyright: ignore[reportAttributeAccessIssue]
-        index, train_counts, bar_width, label="Train Labels"
-    )
-    test_bar = ax.bar(  # pyright: ignore[reportAttributeAccessIssue]
-        index + bar_width, test_counts, bar_width, label="Test Labels"
-    )
-
-    ax.set_xlabel("Labels")  # pyright: ignore[reportAttributeAccessIssue]
-    ax.set_ylabel("Frequency")  # pyright: ignore[reportAttributeAccessIssue]
-    ax.set_title("Label Drift Plot (Cramer's V drift score)")  # pyright: ignore[reportAttributeAccessIssue]
-    ax.set_xticks(index + bar_width / 2)  # pyright: ignore[reportAttributeAccessIssue]
-    ax.set_xticklabels(["0", "1"])  # pyright: ignore[reportAttributeAccessIssue]
-    ax.legend()  # pyright: ignore[reportAttributeAccessIssue]
-
-    def autolabel(bars: BarContainer):
-        """Attach a text label above each bar in *bars*, displaying its height."""
-        for bar in bars:
-            height = bar.get_height()
-            ax.annotate(  # pyright: ignore[reportAttributeAccessIssue]
-                f"{height:.2f}",
-                xy=(bar.get_x() + bar.get_width() / 2, height),
-                xytext=(0, 3),
-                textcoords="offset points",
-                ha="center",
-                va="bottom",
-            )
-
-    autolabel(train_bar)
-    autolabel(test_bar)
-
-    drift_score = cramers_v_score(train_labels, test_labels)
-    if drift_score < 0.1:
-        score_color = "green"
-    elif 0.1 <= drift_score <= 0.2:
-        score_color = "orange"
-    else:
-        score_color = "red"
-
-    ax.text(  # pyright: ignore[reportAttributeAccessIssue]
-        0.5,
-        0.95,
-        f"Drift score = {drift_score:.2f}",
-        ha="center",
-        va="top",
-        color=score_color,
-        transform=ax.transAxes,  # pyright: ignore[reportAttributeAccessIssue]
-    )
-
-    plt.tight_layout()
-    path = "Label_drift.png"
-    plt.savefig(path, bbox_inches="tight")
-    plt.close()
-
-    return TestSuiteReturnType(
-        metrics={}, properties={"_label_drift_score": drift_score}, tables=[], attachments=[path]
-    )
-
-
-def plot_correlation_matrix(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
-) -> TestSuiteReturnType:
-    from vectice.models.validation import TestSuiteReturnType
-
-    subset_columns = internal_parameters.get(
-        "subset_columns", [target_column] + [col for col in training_df.columns[:10] if col != "TARGET"]
-    )
-    cmap = internal_parameters.get("cmap", "Blues")
-
-    # Select subset of columns
-    training_df = training_df[subset_columns]
-
-    # Calculate the correlation matrix
-    corr_matrix = training_df.corr()
-
-    # Plot the correlation matrix
-    plt.figure(figsize=(10, 8))
-    sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", annot_kws={"fontsize": 12}, cbar=True)
-    plt.title("Correlation Matrix")
-
-    # Save the plot
-    file_path = "Correlation_matrix_plot.png"
-    plt.savefig(file_path, bbox_inches="tight")
-    plt.close()
-
-    return TestSuiteReturnType(
-        metrics={},
-        properties={},
-        tables=[],
-        attachments=[file_path],
-    )
-
-
-# custom test which can be used for dataset validation
-def test_dataset_split(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
-) -> TestSuiteReturnType:
-    from vectice import Table
-    from vectice.models.validation import TestSuiteReturnType
-
-    total_df = len(training_df) + len(testing_df)
-
-    # Create a DataFrame with the results
-    datasplit_df = pd.DataFrame(
-        {
-            "Dataset": ["Train", "Test", "Total"],
-            "Size": [len(training_df), len(testing_df), total_df],
-            "Percentage": [
-                (len(training_df) / total_df * 100),
-                (len(testing_df) / total_df * 100),
-                100,
-            ],
-        }
-    )
-
-    table = Table(datasplit_df)
-
-    return TestSuiteReturnType(metrics={}, properties={}, tables=[table], attachments=[])
-
-
-# custom test which can be used for dataset validation
-def iqr_and_outliers(
-    training_df: DataFrame,
-    testing_df: DataFrame,
-    target_column: str,
-    predictor: Any,
-    predict_proba_train: ArrayLike | None,
-    predict_proba_test: ArrayLike | None,
-    internal_parameters: Dict[str, Any] = {"subset_columns": None, "cmap": "Blues"},
-) -> TestSuiteReturnType | None:
-    from vectice.models.validation import TestSuiteReturnType
-
-    dataset = training_df
-
-    files = []
-    # disable plots showing
-    if internal_parameters.get("subset_columns") is not None:
-        columns = internal_parameters.get("subset_columns")
-    else:
-        columns = dataset.select_dtypes(include=[np.number]).columns[:10]
-    plt.ioff()
-    for column in columns:  # type: ignore
-        file_name = f"iqr_and_outliers_{column}.png"
-
-        temp_file_path = file_name
-
-        Q1 = dataset[column].quantile(0.25)
-        Q3 = dataset[column].quantile(0.75)
-        IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-
-        plt.figure(figsize=(10, 6))
-        plt.hist(dataset[column], bins=20, edgecolor="k", alpha=0.7)
-        plt.axvline(Q1, color="r", linestyle="--", label=f"Q1 (25th percentile): {Q1:.2f}")
-        plt.axvline(Q3, color="b", linestyle="--", label=f"Q3 (75th percentile): {Q3:.2f}")
-        plt.axvline(
-            dataset[column].median(),
-            color="g",
-            linestyle="-",
-            label=f"Median: {dataset[column].median():.2f}",
-        )
-        plt.fill_betweenx([0, plt.ylim()[1]], Q1, Q3, color="gray", alpha=0.3, label=f"IQR: {IQR:.2f}")
-
-        # Highlight outliers
-        outliers = dataset[(dataset[column] < lower_bound) | (dataset[column] > upper_bound)][column]
-        plt.scatter(outliers, [0] * len(outliers), color="red", label="Outliers", zorder=5)
-
-        plt.title(f"Histogram with IQR and Outliers for {column}")
-        plt.xlabel(column)
-        plt.ylabel("Frequency")
-        plt.legend()
-        plt.savefig(temp_file_path, bbox_inches="tight")
-        files.append(temp_file_path)
-
-    plt.ion()
-    return TestSuiteReturnType(
-        metrics={},
-        properties={},
-        tables=[],
-        attachments=files,
-    )
\ No newline at end of file
diff --git a/Validation/vectice_wrappers.py b/Validation/vectice_wrappers.py
new file mode 100644
index 0000000..503fbc7
--- /dev/null
+++ b/Validation/vectice_wrappers.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+import inspect
+
+from typing import Any, Dict
+from vectice.models.validation import TestSuiteReturnType
+
+
+## You just pass your function as an argument
+def Vectice_wrapper_function(
+    module: callable,
+    internal_functions_param: Dict[str, Any],
+) -> TestSuiteReturnType:
+    
+    # Inspect the signature of the internal function
+    signature = inspect.signature(module)
+
+    # Validate that all required parameters are provided
+    for param_name, param in signature.parameters.items():
+        if param.default == inspect.Parameter.empty and param_name not in internal_functions_param:
+            raise ValueError(f"Missing required parameter: {param_name}")
+
+    # Filter out any extra parameters not in the signature
+    filtered_params = {param_name: internal_functions_param[param_name] for param_name in signature.parameters if param_name in internal_functions_param}
+
+    # Run the provided callable with filtered parameters
+    result = module(**filtered_params)
+
+    # Helper function to extract paths
+    def extract_paths(obj):
+        paths = []
+        if isinstance(obj, dict):
+            for key, value in obj.items():
+                paths.extend(extract_paths(value))
+        elif isinstance(obj, list):
+            for item in obj:
+                paths.extend(extract_paths(item))
+        elif isinstance(obj, str):
+            paths.append(obj)
+        elif hasattr(obj, 'attachments'):
+            paths.extend(extract_paths(obj.attachments))
+        return paths
+
+    # Extract paths from the result
+    extracted_paths = extract_paths(result)
+
+    # Convert the result to a dictionary
+    output_files = {
+        "paths": extracted_paths,
+    }
+
+    # Return in the expected format
+    return TestSuiteReturnType(**output_files)
+
+
+
+def Vectice_wrapper(
+    output_files: Dict[str, Any] = {"paths": None, "dataframes": None, "metrics": None, "properties": None},
+) -> TestSuiteReturnType:
+
+    ####
+    #####Paste your code Here
+    ##### 
+    
+
+
+    # RETURN IN THE VECTICE EXPECTED FORMART
+    return TestSuiteReturnType(
+        metrics=output_files["metrics"],
+        properties=output_files["properties"],
+        tables=output_files["dataframes"],
+        attachments=output_files["paths"],
+    )