@@ -39,12 +39,10 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
39
39
40
40
41
41
@pytest .fixture (scope = "module" )
42
- def tokenizer_name (model_name : str , zephyr_lora_added_tokens_files : str ): # noqa: F811
43
- return (
44
- zephyr_lora_added_tokens_files
45
- if (model_name == "zephyr-lora2" )
46
- else model_name
47
- )
42
+ def tokenizer_name (model_name : str ,
43
+ zephyr_lora_added_tokens_files : str ): # noqa: F811
44
+ return (zephyr_lora_added_tokens_files if
45
+ (model_name == "zephyr-lora2" ) else model_name )
48
46
49
47
50
48
@pytest_asyncio .fixture
@@ -64,9 +62,8 @@ async def test_tokenize_completions(
64
62
model_name : str ,
65
63
tokenizer_name : str ,
66
64
):
67
- tokenizer = get_tokenizer (
68
- tokenizer_name = tokenizer_name , tokenizer_mode = "fast"
69
- )
65
+ tokenizer = get_tokenizer (tokenizer_name = tokenizer_name ,
66
+ tokenizer_mode = "fast" )
70
67
71
68
for add_special in [False , True ]:
72
69
prompt = "vllm1 This is a test prompt."
@@ -100,34 +97,42 @@ async def test_tokenize_chat(
100
97
model_name : str ,
101
98
tokenizer_name : str ,
102
99
):
103
- tokenizer = get_tokenizer (
104
- tokenizer_name = tokenizer_name , tokenizer_mode = "fast"
105
- )
100
+ tokenizer = get_tokenizer (tokenizer_name = tokenizer_name ,
101
+ tokenizer_mode = "fast" )
106
102
107
103
for add_generation in [False , True ]:
108
104
for add_special in [False , True ]:
109
105
conversation = [
110
- {"role" : "user" , "content" : "Hi there!" },
111
- {"role" : "assistant" , "content" : "Nice to meet you!" },
112
- {"role" : "user" , "content" : "Can I ask a question? vllm1" },
106
+ {
107
+ "role" : "user" ,
108
+ "content" : "Hi there!"
109
+ },
110
+ {
111
+ "role" : "assistant" ,
112
+ "content" : "Nice to meet you!"
113
+ },
114
+ {
115
+ "role" : "user" ,
116
+ "content" : "Can I ask a question? vllm1"
117
+ },
113
118
]
114
119
for continue_final in [False , True ]:
115
120
if add_generation and continue_final :
116
121
continue
117
122
if continue_final :
118
- conversation .append (
119
- {"role" : "assistant" , "content" : "Sure," }
120
- )
123
+ conversation .append ({
124
+ "role" : "assistant" ,
125
+ "content" : "Sure,"
126
+ })
121
127
122
128
prompt = tokenizer .apply_chat_template (
123
129
add_generation_prompt = add_generation ,
124
130
continue_final_message = continue_final ,
125
131
conversation = conversation ,
126
132
tokenize = False ,
127
133
)
128
- tokens = tokenizer .encode (
129
- prompt , add_special_tokens = add_special
130
- )
134
+ tokens = tokenizer .encode (prompt ,
135
+ add_special_tokens = add_special )
131
136
132
137
response = requests .post (
133
138
server .url_for ("tokenize" ),
@@ -159,39 +164,41 @@ async def test_tokenize_chat_with_tools(
159
164
model_name : str ,
160
165
tokenizer_name : str ,
161
166
):
162
- tokenizer = get_tokenizer (
163
- tokenizer_name = tokenizer_name , tokenizer_mode = "fast"
164
- )
167
+ tokenizer = get_tokenizer (tokenizer_name = tokenizer_name ,
168
+ tokenizer_mode = "fast" )
165
169
166
170
for add_generation in [False , True ]:
167
171
for add_special in [False , True ]:
168
- conversation = [
169
- {
170
- "role" : "user" ,
171
- "content" : "What's the weather like in Paris today?" ,
172
- }
173
- ]
174
-
175
- tools = [
176
- {
177
- "type" : "function" ,
178
- "function" : {
179
- "name" : "get_weather" ,
180
- "parameters" : {
181
- "type" : "object" ,
182
- "properties" : {"location" : {"type" : "string" }},
172
+ conversation = [{
173
+ "role" :
174
+ "user" ,
175
+ "content" :
176
+ "What's the weather like in Paris today?" ,
177
+ }]
178
+
179
+ tools = [{
180
+ "type" : "function" ,
181
+ "function" : {
182
+ "name" : "get_weather" ,
183
+ "parameters" : {
184
+ "type" : "object" ,
185
+ "properties" : {
186
+ "location" : {
187
+ "type" : "string"
188
+ }
183
189
},
184
190
},
185
- }
186
- ]
191
+ },
192
+ } ]
187
193
188
194
for continue_final in [False , True ]:
189
195
if add_generation and continue_final :
190
196
continue
191
197
if continue_final :
192
- conversation .append (
193
- {"role" : "assistant" , "content" : "Sure," }
194
- )
198
+ conversation .append ({
199
+ "role" : "assistant" ,
200
+ "content" : "Sure,"
201
+ })
195
202
196
203
prompt = tokenizer .apply_chat_template (
197
204
add_generation_prompt = add_generation ,
@@ -200,9 +207,8 @@ async def test_tokenize_chat_with_tools(
200
207
tools = tools ,
201
208
tokenize = False ,
202
209
)
203
- tokens = tokenizer .encode (
204
- prompt , add_special_tokens = add_special
205
- )
210
+ tokens = tokenizer .encode (prompt ,
211
+ add_special_tokens = add_special )
206
212
207
213
response = requests .post (
208
214
server .url_for ("tokenize" ),
@@ -235,14 +241,17 @@ async def test_tokenize_with_return_token_strs(
235
241
model_name : str ,
236
242
tokenizer_name : str ,
237
243
):
238
- tokenizer = get_tokenizer (
239
- tokenizer_name = tokenizer_name , tokenizer_mode = "fast"
240
- )
244
+ tokenizer = get_tokenizer (tokenizer_name = tokenizer_name ,
245
+ tokenizer_mode = "fast" )
241
246
242
247
prompt = "This is a token_strs test prompt! vllm1"
243
248
response = requests .post (
244
249
server .url_for ("tokenize" ),
245
- json = {"prompt" : prompt , "model" : model_name , "return_token_strs" : True },
250
+ json = {
251
+ "prompt" : prompt ,
252
+ "model" : model_name ,
253
+ "return_token_strs" : True
254
+ },
246
255
)
247
256
response .raise_for_status ()
248
257
@@ -267,16 +276,18 @@ async def test_detokenize(
267
276
model_name : str ,
268
277
tokenizer_name : str ,
269
278
):
270
- tokenizer = get_tokenizer (
271
- tokenizer_name = tokenizer_name , tokenizer_mode = "fast"
272
- )
279
+ tokenizer = get_tokenizer (tokenizer_name = tokenizer_name ,
280
+ tokenizer_mode = "fast" )
273
281
274
282
prompt = "This is a test prompt. vllm1"
275
283
tokens = tokenizer .encode (prompt , add_special_tokens = False )
276
284
277
285
response = requests .post (
278
286
server .url_for ("detokenize" ),
279
- json = {"model" : model_name , "tokens" : tokens },
287
+ json = {
288
+ "model" : model_name ,
289
+ "tokens" : tokens
290
+ },
280
291
)
281
292
response .raise_for_status ()
282
293
@@ -326,15 +337,14 @@ async def test_get_tokenizer_info_schema(server: RemoteOpenAIServer):
326
337
}
327
338
for field , expected_type in field_types .items ():
328
339
if field in result and result [field ] is not None :
329
- assert isinstance (result [ field ], expected_type ), (
330
- f" { field } should be { expected_type . __name__ } "
331
- )
340
+ assert isinstance (
341
+ result [ field ],
342
+ expected_type ), ( f" { field } should be { expected_type . __name__ } " )
332
343
333
344
334
345
@pytest .mark .asyncio
335
346
async def test_get_tokenizer_info_added_tokens_structure (
336
- server : RemoteOpenAIServer ,
337
- ):
347
+ server : RemoteOpenAIServer , ):
338
348
"""Test added_tokens_decoder structure if present."""
339
349
response = requests .get (server .url_for ("get_tokenizer_info" ))
340
350
response .raise_for_status ()
@@ -346,33 +356,32 @@ async def test_get_tokenizer_info_added_tokens_structure(
346
356
assert isinstance (token_info , dict ), "Token info should be a dict"
347
357
assert "content" in token_info , "Token info should have content"
348
358
assert "special" in token_info , (
349
- "Token info should have special flag"
350
- )
351
- assert isinstance (token_info ["special" ], bool ), (
352
- "Special flag should be boolean"
353
- )
359
+ "Token info should have special flag" )
360
+ assert isinstance (token_info ["special" ],
361
+ bool ), ("Special flag should be boolean" )
354
362
355
363
356
364
@pytest .mark .asyncio
357
365
async def test_get_tokenizer_info_consistency_with_tokenize (
358
- server : RemoteOpenAIServer ,
359
- ):
366
+ server : RemoteOpenAIServer , ):
360
367
"""Test that tokenizer info is consistent with tokenization endpoint."""
361
368
info_response = requests .get (server .url_for ("get_tokenizer_info" ))
362
369
info_response .raise_for_status ()
363
370
info = info_response .json ()
364
371
tokenize_response = requests .post (
365
372
server .url_for ("tokenize" ),
366
- json = {"model" : MODEL_NAME , "prompt" : "Hello world!" },
373
+ json = {
374
+ "model" : MODEL_NAME ,
375
+ "prompt" : "Hello world!"
376
+ },
367
377
)
368
378
tokenize_response .raise_for_status ()
369
379
tokenize_result = tokenize_response .json ()
370
380
info_max_len = info .get ("model_max_length" )
371
381
tokenize_max_len = tokenize_result .get ("max_model_len" )
372
382
if info_max_len and tokenize_max_len :
373
383
assert info_max_len >= tokenize_max_len , (
374
- "Info max length should be >= tokenize max length"
375
- )
384
+ "Info max length should be >= tokenize max length" )
376
385
377
386
378
387
@pytest .mark .asyncio
@@ -383,7 +392,6 @@ async def test_get_tokenizer_info_chat_template(server: RemoteOpenAIServer):
383
392
result = response .json ()
384
393
chat_template = result .get ("chat_template" )
385
394
if chat_template :
386
- assert isinstance (chat_template , str ), (
387
- "Chat template should be a string"
388
- )
395
+ assert isinstance (chat_template ,
396
+ str ), ("Chat template should be a string" )
389
397
assert chat_template .strip (), "Chat template should not be empty"
0 commit comments