We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b2c3dfc commit c9250dfCopy full SHA for c9250df
tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -70,7 +70,7 @@ def test_mtp_correctness(
70
model=model_name,
71
tensor_parallel_size=1,
72
max_num_seqs=256,
73
- gpu_memory_utilization=0.5,
+ gpu_memory_utilization=0.6,
74
distributed_executor_backend="mp",
75
enable_expert_parallel=True,
76
speculative_config={
0 commit comments