Skip to content

Commit 63772fb

Browse files
dtrifirojingyu
authored andcommitted
[CI/Build] get rid of unused VLLM_FA_CMAKE_GPU_ARCHES (vllm-project#21599)
Signed-off-by: Daniele Trifirò <[email protected]> Signed-off-by: jingyu <[email protected]>
1 parent 3c20b8c commit 63772fb

File tree

5 files changed

+2
-11
lines changed

5 files changed

+2
-11
lines changed

.buildkite/scripts/hardware_ci/run-gh200-test.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ DOCKER_BUILDKIT=1 docker build . \
1616
--build-arg max_jobs=66 \
1717
--build-arg nvcc_threads=2 \
1818
--build-arg RUN_WHEEL_CHECK=false \
19-
--build-arg torch_cuda_arch_list="9.0+PTX" \
20-
--build-arg vllm_fa_cmake_gpu_arches="90-real"
19+
--build-arg torch_cuda_arch_list="9.0+PTX"
2120

2221
# Setup cleanup
2322
remove_docker_container() { docker rm -f gh200-test || true; }

.github/workflows/scripts/build.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ $python_executable -m pip install -r requirements/build.txt -r requirements/cuda
1515
export MAX_JOBS=1
1616
# Make sure release wheels are built for the following architectures
1717
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
18-
export VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
1918

2019
bash tools/check_repo.sh
2120

docker/Dockerfile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
164164
# see https://github.com/pytorch/pytorch/pull/123243
165165
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
166166
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
167-
# Override the arch list for flash-attn to reduce the binary size
168-
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
169-
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
170167
#################### BASE BUILD IMAGE ####################
171168

172169
#################### WHEEL BUILD IMAGE ####################

docker/Dockerfile.nightly_torch

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,6 @@ RUN cat torch_build_versions.txt
114114
# explicitly set the list to avoid issues with torch 2.2
115115
# see https://github.com/pytorch/pytorch/pull/123243
116116

117-
# Override the arch list for flash-attn to reduce the binary size
118-
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
119-
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
120117
#################### BASE BUILD IMAGE ####################
121118

122119
#################### WHEEL BUILD IMAGE ####################

docs/deployment/docker.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `--
106106
-t vllm/vllm-gh200-openai:latest \
107107
--build-arg max_jobs=66 \
108108
--build-arg nvcc_threads=2 \
109-
--build-arg torch_cuda_arch_list="9.0 10.0+PTX" \
110-
--build-arg vllm_fa_cmake_gpu_arches="90-real"
109+
--build-arg torch_cuda_arch_list="9.0 10.0+PTX"
111110
```
112111

113112
!!! note

0 commit comments

Comments
 (0)