Skip to content

Commit 1bb9e35

Browse files
committed
Manual fixes
Signed-off-by: Harry Mellor <[email protected]>
1 parent 9e4a464 commit 1bb9e35

File tree

30 files changed

+145
-116
lines changed

30 files changed

+145
-116
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ repos:
2424
rev: v0.11.7
2525
hooks:
2626
- id: ruff
27-
args: [--output-format, github, --fix]
27+
# args: [--output-format, github, --fix]
2828
- id: ruff-format
2929
files: |
3030
(?x)^(

tests/compile/piecewise/test_full_cudagraph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ def test_full_cudagraph_with_invalid_backend():
159159
temporary_environ(
160160
{
161161
"VLLM_USE_V1": "1",
162-
"VLLM_FLASH_ATTN_VERSION": "2", # FA2 not supported with full_cuda_graph
162+
# FA2 not supported with full_cuda_graph
163+
"VLLM_FLASH_ATTN_VERSION": "2",
163164
}
164165
),
165166
pytest.raises(RuntimeError),

tests/compile/piecewise/test_simple.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,16 @@ def test_simple_piecewise_compile(use_inductor):
100100

101101
with (
102102
compilation_counter.expect(
103-
num_graphs_seen=1, # one graph for the model
104-
num_piecewise_graphs_seen=5, # 2 * num_layers + 1
105-
num_piecewise_capturable_graphs_seen=3, # 1 + num_layers
106-
num_backend_compilations=3, # num_piecewise_capturable_graphs_seen
107-
num_cudagraph_captured=6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
103+
# one graph for the model
104+
num_graphs_seen=1,
105+
# 2 * num_layers + 1
106+
num_piecewise_graphs_seen=5,
107+
# 1 + num_layers
108+
num_piecewise_capturable_graphs_seen=3,
109+
# num_piecewise_capturable_graphs_seen
110+
num_backend_compilations=3,
111+
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
112+
num_cudagraph_captured=6,
108113
),
109114
set_forward_context({}, vllm_config=vllm_config),
110115
):

tests/compile/piecewise/test_toy_llama.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -361,11 +361,14 @@ def test_toy_llama(use_inductor: bool):
361361
kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0}
362362

363363
with compilation_counter.expect(
364-
num_graphs_seen=1, # one graph for the model
364+
# one graph for the model
365+
num_graphs_seen=1,
365366
num_piecewise_graphs_seen=1,
366367
num_piecewise_capturable_graphs_seen=1,
367-
num_backend_compilations=1, # num_piecewise_capturable_graphs_seen
368-
num_cudagraph_captured=2, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
368+
# num_piecewise_capturable_graphs_seen
369+
num_backend_compilations=1,
370+
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
371+
num_cudagraph_captured=2,
369372
**kwargs,
370373
):
371374
outputs.append(
@@ -374,16 +377,16 @@ def test_toy_llama(use_inductor: bool):
374377
run_model(tractable_config, use_inductor=use_inductor, use_compile=True)
375378

376379
with compilation_counter.expect(
377-
num_graphs_seen=1, # one graph for the model
378-
num_piecewise_graphs_seen=2 * llama_config.num_layers + 1, # 2 * num_layers + 1
379-
num_piecewise_capturable_graphs_seen=1
380-
+ llama_config.num_layers, # 1 + num_layers
381-
num_backend_compilations=1
382-
+ llama_config.num_layers, # num_piecewise_capturable_graphs_seen
383-
num_cudagraph_captured=2
384-
* (
385-
1 + llama_config.num_layers
386-
), # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
380+
# one graph for the model
381+
num_graphs_seen=1,
382+
# 2 * num_layers + 1
383+
num_piecewise_graphs_seen=2 * llama_config.num_layers + 1,
384+
# 1 + num_layers
385+
num_piecewise_capturable_graphs_seen=1 + llama_config.num_layers,
386+
# num_piecewise_capturable_graphs_seen
387+
num_backend_compilations=1 + llama_config.num_layers,
388+
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
389+
num_cudagraph_captured=2 * (1 + llama_config.num_layers),
387390
):
388391
outputs.append(
389392
run_model(
@@ -470,11 +473,7 @@ def benchmark():
470473
# and use it later, because it will look up the name `b` in the
471474
# enclosing scope, and the value of `b` will always be 256.
472475
# it is fine here, because we only use the lambda function once.
473-
runtime = do_bench(
474-
lambda: graphs[b][0]( # noqa
475-
input_ids[:b], positions[:b]
476-
)
477-
) # noqa
476+
runtime = do_bench(lambda: graphs[b][0](input_ids[:b], positions[:b])) # noqa
478477
piecewise_cudagraph_time[b] = runtime
479478
else:
480479
runtime = do_bench(lambda: graphs[b][0].replay()) # noqa

tests/core/test_scheduler.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,8 +1109,9 @@ def test_remove_seq_from_computed_blocks_tracker():
11091109
scheduler._add_seq_group_to_swapped(seq_group)
11101110

11111111
scheduler._schedule_swapped(budget, curr_loras)
1112-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1113-
1
1112+
computed_blocks_tracker = scheduler.block_manager._computed_blocks_tracker
1113+
seq_id_to_num_tokens_computed = (
1114+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(1)
11141115
)
11151116
assert seq_id_to_num_tokens_computed is None
11161117

@@ -1139,15 +1140,15 @@ def test_remove_seq_from_computed_blocks_tracker():
11391140
scheduler.add_seq_group(seq_group)
11401141

11411142
scheduler._schedule_prefills(budget, curr_loras)
1142-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1143-
1
1143+
seq_id_to_num_tokens_computed = (
1144+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(1)
11441145
)
11451146
assert seq_id_to_num_tokens_computed is None
11461147

11471148
# Priority preemption schedule
11481149
scheduler._schedule_priority_preemption(budget)
1149-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1150-
1
1150+
seq_id_to_num_tokens_computed = (
1151+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(1)
11511152
)
11521153
assert seq_id_to_num_tokens_computed is None
11531154

@@ -1187,8 +1188,8 @@ def test_remove_seq_from_computed_blocks_tracker():
11871188
scheduler.add_seq_group(seq_group)
11881189

11891190
scheduler._schedule_default()
1190-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1191-
1
1191+
seq_id_to_num_tokens_computed = (
1192+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(1)
11921193
)
11931194
assert seq_id_to_num_tokens_computed is None
11941195

@@ -1223,8 +1224,8 @@ def test_remove_seq_from_computed_blocks_tracker():
12231224
scheduler.add_seq_group(seq_group)
12241225

12251226
scheduler._schedule_default()
1226-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1227-
2
1227+
seq_id_to_num_tokens_computed = (
1228+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(2)
12281229
)
12291230
assert seq_id_to_num_tokens_computed is None
12301231

@@ -1261,8 +1262,8 @@ def test_remove_seq_from_computed_blocks_tracker():
12611262
scheduler.add_seq_group(seq_group)
12621263

12631264
scheduler._schedule_default()
1264-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1265-
1
1265+
seq_id_to_num_tokens_computed = (
1266+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(1)
12661267
)
12671268
assert seq_id_to_num_tokens_computed is None
12681269

@@ -1289,8 +1290,8 @@ def test_remove_seq_from_computed_blocks_tracker():
12891290
for _, seq_group in seq_and_seq_groups:
12901291
scheduler.add_seq_group(seq_group)
12911292
scheduler._schedule_default()
1292-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1293-
0
1293+
seq_id_to_num_tokens_computed = (
1294+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(0)
12941295
)
12951296
assert seq_id_to_num_tokens_computed is None
12961297

@@ -1323,8 +1324,8 @@ def test_remove_seq_from_computed_blocks_tracker():
13231324
scheduler.add_seq_group(seq_group)
13241325

13251326
scheduler._schedule_default()
1326-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1327-
0
1327+
seq_id_to_num_tokens_computed = (
1328+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(0)
13281329
)
13291330
assert seq_id_to_num_tokens_computed is None
13301331

@@ -1357,7 +1358,7 @@ def test_remove_seq_from_computed_blocks_tracker():
13571358
scheduler.add_seq_group(seq_group)
13581359

13591360
scheduler._schedule_default()
1360-
seq_id_to_num_tokens_computed = scheduler.block_manager._computed_blocks_tracker._seq_id_to_num_tokens_computed.get(
1361-
1
1361+
seq_id_to_num_tokens_computed = (
1362+
computed_blocks_tracker._seq_id_to_num_tokens_computed.get(1)
13621363
)
13631364
assert seq_id_to_num_tokens_computed is None

tests/entrypoints/conftest.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,15 @@ def sample_complex_json_schema():
6767
"type": "object",
6868
"properties": {
6969
"score": {
70+
# Numeric range
7071
"type": "integer",
7172
"minimum": 0,
72-
"maximum": 100, # Numeric range
73+
"maximum": 100,
7374
},
7475
"grade": {
76+
# Regex pattern
7577
"type": "string",
76-
"pattern": "^[A-D]$", # Regex pattern
78+
"pattern": "^[A-D]$",
7779
},
7880
"email": {
7981
"type": "string",
@@ -82,8 +84,9 @@ def sample_complex_json_schema():
8284
"tags": {
8385
"type": "array",
8486
"items": {
87+
# Combining length and pattern restrictions
8588
"type": "string",
86-
"pattern": "^[a-z]{1,10}$", # Combining length and pattern restrictions
89+
"pattern": "^[a-z]{1,10}$",
8790
},
8891
},
8992
},

tests/entrypoints/openai/test_audio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ async def test_single_chat_session_audio_base64encoded(
143143
{
144144
"type": "audio_url",
145145
"audio_url": {
146-
"url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}"
146+
"url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}" # noqa: E501
147147
},
148148
},
149149
{"type": "text", "text": "What's happening in this audio?"},

tests/entrypoints/openai/test_chat.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def server(
3838
request,
3939
monkeypatch_module,
4040
zephyr_lora_files, # noqa: F811
41-
zephyr_lora_added_tokens_files,
42-
): # noqa: F811
41+
zephyr_lora_added_tokens_files, # noqa: F811
42+
):
4343
use_v1 = request.param
4444
monkeypatch_module.setenv("VLLM_USE_V1", "1" if use_v1 else "0")
4545

@@ -713,12 +713,14 @@ async def test_required_tool_use(
713713
"properties": {
714714
"city": {
715715
"type": "string",
716-
"description": "The city to find the weather for, e.g. 'Vienna'",
716+
"description": "The city to find the weather for, e.g. "
717+
"'Vienna'",
717718
"default": "Vienna",
718719
},
719720
"country": {
720721
"type": "string",
721-
"description": "The country that the city is in, e.g. 'Austria'",
722+
"description": "The country that the city is in, e.g. "
723+
"'Austria'",
722724
},
723725
"unit": {
724726
"type": "string",
@@ -740,16 +742,19 @@ async def test_required_tool_use(
740742
"properties": {
741743
"city": {
742744
"type": "string",
743-
"description": "The city to get the forecast for, e.g. 'Vienna'",
745+
"description": "The city to get the forecast for, e.g. "
746+
"'Vienna'",
744747
"default": "Vienna",
745748
},
746749
"country": {
747750
"type": "string",
748-
"description": "The country that the city is in, e.g. 'Austria'",
751+
"description": "The country that the city is in, e.g. "
752+
"'Austria'",
749753
},
750754
"days": {
751755
"type": "integer",
752-
"description": "Number of days to get the forecast for (1-7)",
756+
"description": "Number of days to get the forecast for "
757+
"(1-7)",
753758
},
754759
"unit": {
755760
"type": "string",
@@ -957,7 +962,8 @@ async def test_complex_message_content(client: openai.AsyncOpenAI):
957962
"content": [
958963
{
959964
"type": "text",
960-
"text": "what is 1+1? please provide the result without any other text.",
965+
"text": "what is 1+1? please provide the result without any "
966+
"other text.",
961967
}
962968
],
963969
}

tests/entrypoints/openai/test_chat_template.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,8 @@ def test_load_chat_template():
7575
# Hard coded value for template_chatml.jinja
7676
assert (
7777
template_content
78-
== """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
79-
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""
80-
) # noqa: E501
78+
== "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}" # noqa: E501
79+
)
8180

8281

8382
def test_no_load_chat_template_filelike():

tests/entrypoints/openai/test_chat_with_tool_reasoning.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,13 @@ async def client(server):
4545
"properties": {
4646
"city": {
4747
"type": "string",
48-
"description": "The city to find the weather for, e.g. 'San Francisco'",
48+
"description": "The city to find the weather for, e.g. "
49+
"'San Francisco'",
4950
},
5051
"state": {
5152
"type": "string",
52-
"description": "the two-letter abbreviation for the state that the city is"
53+
"description": "the two-letter abbreviation for the state that "
54+
"the city is"
5355
" in, e.g. 'CA' which would mean 'California'",
5456
},
5557
"unit": {
@@ -69,7 +71,8 @@ async def client(server):
6971
{"role": "assistant", "content": "I'm doing well! How can I help you?"},
7072
{
7173
"role": "user",
72-
"content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?",
74+
"content": "Can you tell me what the temperate will be in Dallas, in "
75+
"fahrenheit?",
7376
},
7477
]
7578

0 commit comments

Comments
 (0)