vllm-project
diff --git a/‎tests/entrypoints/openai/test_tokenization.py
Lines changed: 81 additions & 73 deletions b/‎tests/entrypoints/openai/test_tokenization.py
Lines changed: 81 additions & 73 deletions
diff --git a/‎vllm/entrypoints/openai/api_server.py
Lines changed: 6 additions & 6 deletions b/‎vllm/entrypoints/openai/api_server.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎vllm/entrypoints/openai/cli_args.py
Lines changed: 1 addition & 2 deletions b/‎vllm/entrypoints/openai/cli_args.py
Lines changed: 1 addition & 2 deletions
@@ -39,12 +39,10 @@ def server(zephyr_lora_added_tokens_files: str):  # noqa: F811
 
 
 @pytest.fixture(scope="module")
-def tokenizer_name(model_name: str, zephyr_lora_added_tokens_files: str):  # noqa: F811
-    return (
-        zephyr_lora_added_tokens_files
-        if (model_name == "zephyr-lora2")
-        else model_name
-    )
+def tokenizer_name(model_name: str,
+                   zephyr_lora_added_tokens_files: str):  # noqa: F811
+    return (zephyr_lora_added_tokens_files if
+            (model_name == "zephyr-lora2") else model_name)
 
 
 @pytest_asyncio.fixture
@@ -64,9 +62,8 @@ async def test_tokenize_completions(
     model_name: str,
     tokenizer_name: str,
 ):
-    tokenizer = get_tokenizer(
-        tokenizer_name=tokenizer_name, tokenizer_mode="fast"
-    )
+    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
+                              tokenizer_mode="fast")
 
     for add_special in [False, True]:
         prompt = "vllm1 This is a test prompt."
@@ -100,34 +97,42 @@ async def test_tokenize_chat(
     model_name: str,
     tokenizer_name: str,
 ):
-    tokenizer = get_tokenizer(
-        tokenizer_name=tokenizer_name, tokenizer_mode="fast"
-    )
+    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
+                              tokenizer_mode="fast")
 
     for add_generation in [False, True]:
         for add_special in [False, True]:
             conversation = [
-                {"role": "user", "content": "Hi there!"},
-                {"role": "assistant", "content": "Nice to meet you!"},
-                {"role": "user", "content": "Can I ask a question? vllm1"},
+                {
+                    "role": "user",
+                    "content": "Hi there!"
+                },
+                {
+                    "role": "assistant",
+                    "content": "Nice to meet you!"
+                },
+                {
+                    "role": "user",
+                    "content": "Can I ask a question? vllm1"
+                },
             ]
             for continue_final in [False, True]:
                 if add_generation and continue_final:
                     continue
                 if continue_final:
-                    conversation.append(
-                        {"role": "assistant", "content": "Sure,"}
-                    )
+                    conversation.append({
+                        "role": "assistant",
+                        "content": "Sure,"
+                    })
 
                 prompt = tokenizer.apply_chat_template(
                     add_generation_prompt=add_generation,
                     continue_final_message=continue_final,
                     conversation=conversation,
                     tokenize=False,
                 )
-                tokens = tokenizer.encode(
-                    prompt, add_special_tokens=add_special
-                )
+                tokens = tokenizer.encode(prompt,
+                                          add_special_tokens=add_special)
 
                 response = requests.post(
                     server.url_for("tokenize"),
@@ -159,39 +164,41 @@ async def test_tokenize_chat_with_tools(
     model_name: str,
     tokenizer_name: str,
 ):
-    tokenizer = get_tokenizer(
-        tokenizer_name=tokenizer_name, tokenizer_mode="fast"
-    )
+    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
+                              tokenizer_mode="fast")
 
     for add_generation in [False, True]:
         for add_special in [False, True]:
-            conversation = [
-                {
-                    "role": "user",
-                    "content": "What's the weather like in Paris today?",
-                }
-            ]
-
-            tools = [
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "get_weather",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {"location": {"type": "string"}},
+            conversation = [{
+                "role":
+                "user",
+                "content":
+                "What's the weather like in Paris today?",
+            }]
+
+            tools = [{
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string"
+                            }
                         },
                     },
-                }
-            ]
+                },
+            }]
 
             for continue_final in [False, True]:
                 if add_generation and continue_final:
                     continue
                 if continue_final:
-                    conversation.append(
-                        {"role": "assistant", "content": "Sure,"}
-                    )
+                    conversation.append({
+                        "role": "assistant",
+                        "content": "Sure,"
+                    })
 
                 prompt = tokenizer.apply_chat_template(
                     add_generation_prompt=add_generation,
@@ -200,9 +207,8 @@ async def test_tokenize_chat_with_tools(
                     tools=tools,
                     tokenize=False,
                 )
-                tokens = tokenizer.encode(
-                    prompt, add_special_tokens=add_special
-                )
+                tokens = tokenizer.encode(prompt,
+                                          add_special_tokens=add_special)
 
                 response = requests.post(
                     server.url_for("tokenize"),
@@ -235,14 +241,17 @@ async def test_tokenize_with_return_token_strs(
     model_name: str,
     tokenizer_name: str,
 ):
-    tokenizer = get_tokenizer(
-        tokenizer_name=tokenizer_name, tokenizer_mode="fast"
-    )
+    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
+                              tokenizer_mode="fast")
 
     prompt = "This is a token_strs test prompt! vllm1"
     response = requests.post(
         server.url_for("tokenize"),
-        json={"prompt": prompt, "model": model_name, "return_token_strs": True},
+        json={
+            "prompt": prompt,
+            "model": model_name,
+            "return_token_strs": True
+        },
     )
     response.raise_for_status()
 
@@ -267,16 +276,18 @@ async def test_detokenize(
     model_name: str,
     tokenizer_name: str,
 ):
-    tokenizer = get_tokenizer(
-        tokenizer_name=tokenizer_name, tokenizer_mode="fast"
-    )
+    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
+                              tokenizer_mode="fast")
 
     prompt = "This is a test prompt. vllm1"
     tokens = tokenizer.encode(prompt, add_special_tokens=False)
 
     response = requests.post(
         server.url_for("detokenize"),
-        json={"model": model_name, "tokens": tokens},
+        json={
+            "model": model_name,
+            "tokens": tokens
+        },
     )
     response.raise_for_status()
 
@@ -326,15 +337,14 @@ async def test_get_tokenizer_info_schema(server: RemoteOpenAIServer):
     }
     for field, expected_type in field_types.items():
         if field in result and result[field] is not None:
-            assert isinstance(result[field], expected_type), (
-                f"{field} should be {expected_type.__name__}"
-            )
+            assert isinstance(
+                result[field],
+                expected_type), (f"{field} should be {expected_type.__name__}")
 
 
 @pytest.mark.asyncio
 async def test_get_tokenizer_info_added_tokens_structure(
-    server: RemoteOpenAIServer,
-):
+    server: RemoteOpenAIServer, ):
     """Test added_tokens_decoder structure if present."""
     response = requests.get(server.url_for("get_tokenizer_info"))
     response.raise_for_status()
@@ -346,33 +356,32 @@ async def test_get_tokenizer_info_added_tokens_structure(
             assert isinstance(token_info, dict), "Token info should be a dict"
             assert "content" in token_info, "Token info should have content"
             assert "special" in token_info, (
-                "Token info should have special flag"
-            )
-            assert isinstance(token_info["special"], bool), (
-                "Special flag should be boolean"
-            )
+                "Token info should have special flag")
+            assert isinstance(token_info["special"],
+                              bool), ("Special flag should be boolean")
 
 
 @pytest.mark.asyncio
 async def test_get_tokenizer_info_consistency_with_tokenize(
-    server: RemoteOpenAIServer,
-):
+    server: RemoteOpenAIServer, ):
     """Test that tokenizer info is consistent with tokenization endpoint."""
     info_response = requests.get(server.url_for("get_tokenizer_info"))
     info_response.raise_for_status()
     info = info_response.json()
     tokenize_response = requests.post(
         server.url_for("tokenize"),
-        json={"model": MODEL_NAME, "prompt": "Hello world!"},
+        json={
+            "model": MODEL_NAME,
+            "prompt": "Hello world!"
+        },
     )
     tokenize_response.raise_for_status()
     tokenize_result = tokenize_response.json()
     info_max_len = info.get("model_max_length")
     tokenize_max_len = tokenize_result.get("max_model_len")
     if info_max_len and tokenize_max_len:
         assert info_max_len >= tokenize_max_len, (
-            "Info max length should be >= tokenize max length"
-        )
+            "Info max length should be >= tokenize max length")
 
 
 @pytest.mark.asyncio
@@ -383,7 +392,6 @@ async def test_get_tokenizer_info_chat_template(server: RemoteOpenAIServer):
     result = response.json()
     chat_template = result.get("chat_template")
     if chat_template:
-        assert isinstance(chat_template, str), (
-            "Chat template should be a string"
-        )
+        assert isinstance(chat_template,
+                          str), ("Chat template should be a string")
         assert chat_template.strip(), "Chat template should not be empty"
@@ -73,7 +73,6 @@
                                               ResponsesResponse, ScoreRequest,
                                               ScoreResponse, TokenizeRequest,
                                               TokenizeResponse,
-                                              TokenizerInfoResponse,
                                               TranscriptionRequest,
                                               TranscriptionResponse,
                                               TranslationRequest,
@@ -527,15 +526,16 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
 def maybe_register_tokenizer_info_endpoint(args):
     """Conditionally register the tokenizer info endpoint if enabled."""
     if getattr(args, 'enable_tokenizer_info_endpoint', False):
+
         @router.get("/tokenizer_info")
         async def get_tokenizer_info(raw_request: Request):
             """Get comprehensive tokenizer information."""
             result = await tokenization(raw_request).get_tokenizer_info()
-            return JSONResponse(
-                content=result.model_dump(),
-                status_code=result.code if isinstance(result, ErrorResponse) else 200)
-    
-    
+            return JSONResponse(content=result.model_dump(),
+                                status_code=result.code if isinstance(
+                                    result, ErrorResponse) else 200)
+
+
 @router.get("/v1/models")
 async def show_available_models(raw_request: Request):
     handler = models(raw_request)
 
@@ -300,8 +300,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         action='store_true',
         default=False,
         help="Enable the /tokenizer_info endpoint. May expose chat "
-        "templates and other tokenizer configuration."
-    )
+        "templates and other tokenizer configuration.")
 
     return parser