Skip to content

Commit 2adc763

Browse files
committed
🚧 run pre-commit
1 parent 9452d00 commit 2adc763

File tree

5 files changed

+325
-465
lines changed

5 files changed

+325
-465
lines changed

tests/entrypoints/openai/test_tokenization.py

Lines changed: 81 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,10 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
3939

4040

4141
@pytest.fixture(scope="module")
42-
def tokenizer_name(model_name: str, zephyr_lora_added_tokens_files: str): # noqa: F811
43-
return (
44-
zephyr_lora_added_tokens_files
45-
if (model_name == "zephyr-lora2")
46-
else model_name
47-
)
42+
def tokenizer_name(model_name: str,
43+
zephyr_lora_added_tokens_files: str): # noqa: F811
44+
return (zephyr_lora_added_tokens_files if
45+
(model_name == "zephyr-lora2") else model_name)
4846

4947

5048
@pytest_asyncio.fixture
@@ -64,9 +62,8 @@ async def test_tokenize_completions(
6462
model_name: str,
6563
tokenizer_name: str,
6664
):
67-
tokenizer = get_tokenizer(
68-
tokenizer_name=tokenizer_name, tokenizer_mode="fast"
69-
)
65+
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
66+
tokenizer_mode="fast")
7067

7168
for add_special in [False, True]:
7269
prompt = "vllm1 This is a test prompt."
@@ -100,34 +97,42 @@ async def test_tokenize_chat(
10097
model_name: str,
10198
tokenizer_name: str,
10299
):
103-
tokenizer = get_tokenizer(
104-
tokenizer_name=tokenizer_name, tokenizer_mode="fast"
105-
)
100+
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
101+
tokenizer_mode="fast")
106102

107103
for add_generation in [False, True]:
108104
for add_special in [False, True]:
109105
conversation = [
110-
{"role": "user", "content": "Hi there!"},
111-
{"role": "assistant", "content": "Nice to meet you!"},
112-
{"role": "user", "content": "Can I ask a question? vllm1"},
106+
{
107+
"role": "user",
108+
"content": "Hi there!"
109+
},
110+
{
111+
"role": "assistant",
112+
"content": "Nice to meet you!"
113+
},
114+
{
115+
"role": "user",
116+
"content": "Can I ask a question? vllm1"
117+
},
113118
]
114119
for continue_final in [False, True]:
115120
if add_generation and continue_final:
116121
continue
117122
if continue_final:
118-
conversation.append(
119-
{"role": "assistant", "content": "Sure,"}
120-
)
123+
conversation.append({
124+
"role": "assistant",
125+
"content": "Sure,"
126+
})
121127

122128
prompt = tokenizer.apply_chat_template(
123129
add_generation_prompt=add_generation,
124130
continue_final_message=continue_final,
125131
conversation=conversation,
126132
tokenize=False,
127133
)
128-
tokens = tokenizer.encode(
129-
prompt, add_special_tokens=add_special
130-
)
134+
tokens = tokenizer.encode(prompt,
135+
add_special_tokens=add_special)
131136

132137
response = requests.post(
133138
server.url_for("tokenize"),
@@ -159,39 +164,41 @@ async def test_tokenize_chat_with_tools(
159164
model_name: str,
160165
tokenizer_name: str,
161166
):
162-
tokenizer = get_tokenizer(
163-
tokenizer_name=tokenizer_name, tokenizer_mode="fast"
164-
)
167+
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
168+
tokenizer_mode="fast")
165169

166170
for add_generation in [False, True]:
167171
for add_special in [False, True]:
168-
conversation = [
169-
{
170-
"role": "user",
171-
"content": "What's the weather like in Paris today?",
172-
}
173-
]
174-
175-
tools = [
176-
{
177-
"type": "function",
178-
"function": {
179-
"name": "get_weather",
180-
"parameters": {
181-
"type": "object",
182-
"properties": {"location": {"type": "string"}},
172+
conversation = [{
173+
"role":
174+
"user",
175+
"content":
176+
"What's the weather like in Paris today?",
177+
}]
178+
179+
tools = [{
180+
"type": "function",
181+
"function": {
182+
"name": "get_weather",
183+
"parameters": {
184+
"type": "object",
185+
"properties": {
186+
"location": {
187+
"type": "string"
188+
}
183189
},
184190
},
185-
}
186-
]
191+
},
192+
}]
187193

188194
for continue_final in [False, True]:
189195
if add_generation and continue_final:
190196
continue
191197
if continue_final:
192-
conversation.append(
193-
{"role": "assistant", "content": "Sure,"}
194-
)
198+
conversation.append({
199+
"role": "assistant",
200+
"content": "Sure,"
201+
})
195202

196203
prompt = tokenizer.apply_chat_template(
197204
add_generation_prompt=add_generation,
@@ -200,9 +207,8 @@ async def test_tokenize_chat_with_tools(
200207
tools=tools,
201208
tokenize=False,
202209
)
203-
tokens = tokenizer.encode(
204-
prompt, add_special_tokens=add_special
205-
)
210+
tokens = tokenizer.encode(prompt,
211+
add_special_tokens=add_special)
206212

207213
response = requests.post(
208214
server.url_for("tokenize"),
@@ -235,14 +241,17 @@ async def test_tokenize_with_return_token_strs(
235241
model_name: str,
236242
tokenizer_name: str,
237243
):
238-
tokenizer = get_tokenizer(
239-
tokenizer_name=tokenizer_name, tokenizer_mode="fast"
240-
)
244+
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
245+
tokenizer_mode="fast")
241246

242247
prompt = "This is a token_strs test prompt! vllm1"
243248
response = requests.post(
244249
server.url_for("tokenize"),
245-
json={"prompt": prompt, "model": model_name, "return_token_strs": True},
250+
json={
251+
"prompt": prompt,
252+
"model": model_name,
253+
"return_token_strs": True
254+
},
246255
)
247256
response.raise_for_status()
248257

@@ -267,16 +276,18 @@ async def test_detokenize(
267276
model_name: str,
268277
tokenizer_name: str,
269278
):
270-
tokenizer = get_tokenizer(
271-
tokenizer_name=tokenizer_name, tokenizer_mode="fast"
272-
)
279+
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
280+
tokenizer_mode="fast")
273281

274282
prompt = "This is a test prompt. vllm1"
275283
tokens = tokenizer.encode(prompt, add_special_tokens=False)
276284

277285
response = requests.post(
278286
server.url_for("detokenize"),
279-
json={"model": model_name, "tokens": tokens},
287+
json={
288+
"model": model_name,
289+
"tokens": tokens
290+
},
280291
)
281292
response.raise_for_status()
282293

@@ -326,15 +337,14 @@ async def test_get_tokenizer_info_schema(server: RemoteOpenAIServer):
326337
}
327338
for field, expected_type in field_types.items():
328339
if field in result and result[field] is not None:
329-
assert isinstance(result[field], expected_type), (
330-
f"{field} should be {expected_type.__name__}"
331-
)
340+
assert isinstance(
341+
result[field],
342+
expected_type), (f"{field} should be {expected_type.__name__}")
332343

333344

334345
@pytest.mark.asyncio
335346
async def test_get_tokenizer_info_added_tokens_structure(
336-
server: RemoteOpenAIServer,
337-
):
347+
server: RemoteOpenAIServer, ):
338348
"""Test added_tokens_decoder structure if present."""
339349
response = requests.get(server.url_for("get_tokenizer_info"))
340350
response.raise_for_status()
@@ -346,33 +356,32 @@ async def test_get_tokenizer_info_added_tokens_structure(
346356
assert isinstance(token_info, dict), "Token info should be a dict"
347357
assert "content" in token_info, "Token info should have content"
348358
assert "special" in token_info, (
349-
"Token info should have special flag"
350-
)
351-
assert isinstance(token_info["special"], bool), (
352-
"Special flag should be boolean"
353-
)
359+
"Token info should have special flag")
360+
assert isinstance(token_info["special"],
361+
bool), ("Special flag should be boolean")
354362

355363

356364
@pytest.mark.asyncio
357365
async def test_get_tokenizer_info_consistency_with_tokenize(
358-
server: RemoteOpenAIServer,
359-
):
366+
server: RemoteOpenAIServer, ):
360367
"""Test that tokenizer info is consistent with tokenization endpoint."""
361368
info_response = requests.get(server.url_for("get_tokenizer_info"))
362369
info_response.raise_for_status()
363370
info = info_response.json()
364371
tokenize_response = requests.post(
365372
server.url_for("tokenize"),
366-
json={"model": MODEL_NAME, "prompt": "Hello world!"},
373+
json={
374+
"model": MODEL_NAME,
375+
"prompt": "Hello world!"
376+
},
367377
)
368378
tokenize_response.raise_for_status()
369379
tokenize_result = tokenize_response.json()
370380
info_max_len = info.get("model_max_length")
371381
tokenize_max_len = tokenize_result.get("max_model_len")
372382
if info_max_len and tokenize_max_len:
373383
assert info_max_len >= tokenize_max_len, (
374-
"Info max length should be >= tokenize max length"
375-
)
384+
"Info max length should be >= tokenize max length")
376385

377386

378387
@pytest.mark.asyncio
@@ -383,7 +392,6 @@ async def test_get_tokenizer_info_chat_template(server: RemoteOpenAIServer):
383392
result = response.json()
384393
chat_template = result.get("chat_template")
385394
if chat_template:
386-
assert isinstance(chat_template, str), (
387-
"Chat template should be a string"
388-
)
395+
assert isinstance(chat_template,
396+
str), ("Chat template should be a string")
389397
assert chat_template.strip(), "Chat template should not be empty"

vllm/entrypoints/openai/api_server.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@
7373
ResponsesResponse, ScoreRequest,
7474
ScoreResponse, TokenizeRequest,
7575
TokenizeResponse,
76-
TokenizerInfoResponse,
7776
TranscriptionRequest,
7877
TranscriptionResponse,
7978
TranslationRequest,
@@ -527,15 +526,16 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
527526
def maybe_register_tokenizer_info_endpoint(args):
528527
"""Conditionally register the tokenizer info endpoint if enabled."""
529528
if getattr(args, 'enable_tokenizer_info_endpoint', False):
529+
530530
@router.get("/tokenizer_info")
531531
async def get_tokenizer_info(raw_request: Request):
532532
"""Get comprehensive tokenizer information."""
533533
result = await tokenization(raw_request).get_tokenizer_info()
534-
return JSONResponse(
535-
content=result.model_dump(),
536-
status_code=result.code if isinstance(result, ErrorResponse) else 200)
537-
538-
534+
return JSONResponse(content=result.model_dump(),
535+
status_code=result.code if isinstance(
536+
result, ErrorResponse) else 200)
537+
538+
539539
@router.get("/v1/models")
540540
async def show_available_models(raw_request: Request):
541541
handler = models(raw_request)

vllm/entrypoints/openai/cli_args.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
300300
action='store_true',
301301
default=False,
302302
help="Enable the /tokenizer_info endpoint. May expose chat "
303-
"templates and other tokenizer configuration."
304-
)
303+
"templates and other tokenizer configuration.")
305304

306305
return parser
307306

0 commit comments

Comments
 (0)