feat: add mtp ut case

赵江江 · 赵江江 · commit 77d26be3ebf9 · 2025-08-04T08:52:07.000+08:00
Signed-off-by: 赵江江 &lt;zhaojiangjiang1@h-partners.com&gt;
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -60,7 +60,7 @@ def test_mtp_correctness(
     should be the same when using mtp speculative decoding.
     '''
     ref_llm = LLM(model=model_name,
-                  gpu_memory_utilization=0.7,
+                  gpu_memory_utilization=0.5,
                   max_model_len=256,
                   enforce_eager=True)
     ref_outputs = ref_llm.chat(test_prompts, sampling_config)
@@ -72,7 +72,7 @@ def test_mtp_correctness(
                        "method": "deepseek_mtp",
                        "num_speculative_tokens": 1,
                    },
-                   gpu_memory_utilization=0.7,
+                   gpu_memory_utilization=0.5,
                    max_model_len=256,
                    enforce_eager=True)
     spec_outputs = spec_llm.chat(test_prompts, sampling_config)