We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9259b2c commit 77d26beCopy full SHA for 77d26be
tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -60,7 +60,7 @@ def test_mtp_correctness(
60
should be the same when using mtp speculative decoding.
61
'''
62
ref_llm = LLM(model=model_name,
63
- gpu_memory_utilization=0.7,
+ gpu_memory_utilization=0.5,
64
max_model_len=256,
65
enforce_eager=True)
66
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
@@ -72,7 +72,7 @@ def test_mtp_correctness(
72
"method": "deepseek_mtp",
73
"num_speculative_tokens": 1,
74
},
75
76
77
78
spec_outputs = spec_llm.chat(test_prompts, sampling_config)
0 commit comments