Skip to content

Commit 34565e6

Browse files
mgoinjingyu
authored andcommitted
[UX] Rename CUTLASS_MLA_VLLM_V1 to CUTLASS_MLA (vllm-project#21966)
Signed-off-by: mgoin <[email protected]> Signed-off-by: jingyu <[email protected]>
1 parent 73647d9 commit 34565e6

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1417,7 +1417,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
14171417
"PALLAS_VLLM_V1",
14181418
"TRITON_ATTN_VLLM_V1",
14191419
"TRITON_MLA",
1420-
"CUTLASS_MLA_VLLM_V1",
1420+
"CUTLASS_MLA",
14211421
"FLASHMLA",
14221422
"FLASHINFER",
14231423
"FLASHINFER_VLLM_V1",

vllm/platforms/cuda.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,15 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
162162
if cls.is_device_capability(100):
163163
# Blackwell => Force CutlassMLA.
164164
use_cutlass_mla = True
165-
envs.VLLM_ATTENTION_BACKEND = "CUTLASS_MLA_VLLM_V1"
165+
envs.VLLM_ATTENTION_BACKEND = "CUTLASS_MLA"
166166
else:
167167
# Not Blackwell
168168
use_flashmla = True
169169
else:
170170
# Forced case
171171
use_flashmla = (envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
172172
use_cutlass_mla = (
173-
envs.VLLM_ATTENTION_BACKEND == "CUTLASS_MLA_VLLM_V1")
173+
envs.VLLM_ATTENTION_BACKEND == "CUTLASS_MLA")
174174

175175
from vllm.attention.ops.flashmla import is_flashmla_supported
176176
if use_flashmla and is_flashmla_supported()[0] \
@@ -182,7 +182,7 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
182182
if use_cutlass_mla and cache_config.block_size != 128:
183183
cache_config.block_size = 128
184184
logger.info("Forcing kv cache block size to 128 for "
185-
"CUTLASS_MLA_VLLM_V1 backend.")
185+
"CUTLASS_MLA backend.")
186186

187187
compilation_config = vllm_config.compilation_config
188188
if (envs.VLLM_ALL2ALL_BACKEND == "deepep_high_throughput"
@@ -211,9 +211,9 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
211211
kv_cache_dtype, block_size, use_v1,
212212
use_mla) -> str:
213213
if use_mla:
214-
# TODO(lucas): refactor to be more concise
214+
# TODO(lucas): refactor to be more concise
215215
# we should probably consider factoring out V1 here
216-
if selected_backend == _Backend.CUTLASS_MLA_VLLM_V1:
216+
if selected_backend == _Backend.CUTLASS_MLA:
217217
if use_v1:
218218
logger.info_once("Using Cutlass MLA backend on V1 engine.")
219219
return ("vllm.v1.attention.backends.mla."

vllm/platforms/interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class _Backend(enum.Enum):
5353
TRITON_MLA_VLLM_V1 = enum.auto()
5454
FLASHMLA_VLLM_V1 = enum.auto()
5555
FLASHMLA = enum.auto() # Supported by V1
56-
CUTLASS_MLA_VLLM_V1 = enum.auto()
56+
CUTLASS_MLA = enum.auto()
5757
PALLAS = enum.auto()
5858
PALLAS_VLLM_V1 = enum.auto()
5959
IPEX = enum.auto()

vllm/v1/attention/backends/mla/cutlass_mla.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class CutlassMLABackend(MLACommonBackend):
2121

2222
@staticmethod
2323
def get_name() -> str:
24-
return "CUTLASS_MLA_VLLM_V1"
24+
return "CUTLASS_MLA"
2525

2626
@staticmethod
2727
def get_impl_cls() -> type["CutlassMLAImpl"]:

0 commit comments

Comments
 (0)