vllm-project
diff --git a/‎tests/entrypoints/openai/test_tokenization.py
Lines changed: 66 additions & 46 deletions b/‎tests/entrypoints/openai/test_tokenization.py
Lines changed: 66 additions & 46 deletions
diff --git a/‎vllm/entrypoints/openai/api_server.py
Lines changed: 7 additions & 7 deletions b/‎vllm/entrypoints/openai/api_server.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎vllm/entrypoints/openai/cli_args.py
Lines changed: 2 additions & 3 deletions b/‎vllm/entrypoints/openai/cli_args.py
Lines changed: 2 additions & 3 deletions
@@ -41,8 +41,8 @@ def server(zephyr_lora_added_tokens_files: str):  # noqa: F811
 @pytest.fixture(scope="module")
 def tokenizer_name(model_name: str,
                    zephyr_lora_added_tokens_files: str):  # noqa: F811
-    return zephyr_lora_added_tokens_files if (
-        model_name == "zephyr-lora2") else model_name
+    return (zephyr_lora_added_tokens_files if
+            (model_name == "zephyr-lora2") else model_name)
 
 
 @pytest_asyncio.fixture
@@ -69,12 +69,14 @@ async def test_tokenize_completions(
         prompt = "vllm1 This is a test prompt."
         tokens = tokenizer.encode(prompt, add_special_tokens=add_special)
 
-        response = requests.post(server.url_for("tokenize"),
-                                 json={
-                                     "add_special_tokens": add_special,
-                                     "model": model_name,
-                                     "prompt": prompt
-                                 })
+        response = requests.post(
+            server.url_for("tokenize"),
+            json={
+                "add_special_tokens": add_special,
+                "model": model_name,
+                "prompt": prompt,
+            },
+        )
         response.raise_for_status()
 
         result = response.json()
@@ -100,16 +102,20 @@ async def test_tokenize_chat(
 
     for add_generation in [False, True]:
         for add_special in [False, True]:
-            conversation = [{
-                "role": "user",
-                "content": "Hi there!"
-            }, {
-                "role": "assistant",
-                "content": "Nice to meet you!"
-            }, {
-                "role": "user",
-                "content": "Can I ask a question? vllm1"
-            }]
+            conversation = [
+                {
+                    "role": "user",
+                    "content": "Hi there!"
+                },
+                {
+                    "role": "assistant",
+                    "content": "Nice to meet you!"
+                },
+                {
+                    "role": "user",
+                    "content": "Can I ask a question? vllm1"
+                },
+            ]
             for continue_final in [False, True]:
                 if add_generation and continue_final:
                     continue
@@ -123,20 +129,21 @@ async def test_tokenize_chat(
                     add_generation_prompt=add_generation,
                     continue_final_message=continue_final,
                     conversation=conversation,
-                    tokenize=False)
+                    tokenize=False,
+                )
                 tokens = tokenizer.encode(prompt,
                                           add_special_tokens=add_special)
 
-                response = requests.post(server.url_for("tokenize"),
-                                         json={
-                                             "add_generation_prompt":
-                                             add_generation,
-                                             "continue_final_message":
-                                             continue_final,
-                                             "add_special_tokens": add_special,
-                                             "messages": conversation,
-                                             "model": model_name
-                                         })
+                response = requests.post(
+                    server.url_for("tokenize"),
+                    json={
+                        "add_generation_prompt": add_generation,
+                        "continue_final_message": continue_final,
+                        "add_special_tokens": add_special,
+                        "messages": conversation,
+                        "model": model_name,
+                    },
+                )
                 response.raise_for_status()
 
                 result = response.json()
@@ -275,11 +282,13 @@ async def test_detokenize(
     prompt = "This is a test prompt. vllm1"
     tokens = tokenizer.encode(prompt, add_special_tokens=False)
 
-    response = requests.post(server.url_for("detokenize"),
-                             json={
-                                 "model": model_name,
-                                 "tokens": tokens
-                             })
+    response = requests.post(
+        server.url_for("detokenize"),
+        json={
+            "model": model_name,
+            "tokens": tokens
+        },
+    )
     response.raise_for_status()
 
     assert response.json() == {"prompt": prompt}
@@ -302,18 +311,18 @@ async def test_get_tokenizer_info_basic(
     result = response.json()
     assert "tokenizer_class" in result
     assert isinstance(result["tokenizer_class"], str)
-    assert result["tokenizer_class"] 
+    assert result["tokenizer_class"]
 
 
-@pytest.mark.asyncio  
+@pytest.mark.asyncio
 async def test_get_tokenizer_info_schema(server: RemoteOpenAIServer):
     """Test that the response matches expected schema types."""
     response = requests.get(server.url_for("get_tokenizer_info"))
     response.raise_for_status()
     result = response.json()
     field_types = {
         "add_bos_token": bool,
-        "add_prefix_space": bool, 
+        "add_prefix_space": bool,
         "clean_up_tokenization_spaces": bool,
         "split_special_tokens": bool,
         "bos_token": str,
@@ -328,11 +337,14 @@ async def test_get_tokenizer_info_schema(server: RemoteOpenAIServer):
     }
     for field, expected_type in field_types.items():
         if field in result and result[field] is not None:
-            assert isinstance(result[field], expected_type), f"{field} should be {expected_type.__name__}"
+            assert isinstance(
+                result[field],
+                expected_type), (f"{field} should be {expected_type.__name__}")
 
 
 @pytest.mark.asyncio
-async def test_get_tokenizer_info_added_tokens_structure(server: RemoteOpenAIServer):
+async def test_get_tokenizer_info_added_tokens_structure(
+    server: RemoteOpenAIServer, ):
     """Test added_tokens_decoder structure if present."""
     response = requests.get(server.url_for("get_tokenizer_info"))
     response.raise_for_status()
@@ -343,26 +355,33 @@ async def test_get_tokenizer_info_added_tokens_structure(server: RemoteOpenAISer
             assert isinstance(token_id, str), "Token IDs should be strings"
             assert isinstance(token_info, dict), "Token info should be a dict"
             assert "content" in token_info, "Token info should have content"
-            assert "special" in token_info, "Token info should have special flag"
-            assert isinstance(token_info["special"], bool), "Special flag should be boolean"
+            assert "special" in token_info, (
+                "Token info should have special flag")
+            assert isinstance(token_info["special"],
+                              bool), ("Special flag should be boolean")
 
 
 @pytest.mark.asyncio
-async def test_get_tokenizer_info_consistency_with_tokenize(server: RemoteOpenAIServer):
+async def test_get_tokenizer_info_consistency_with_tokenize(
+    server: RemoteOpenAIServer, ):
     """Test that tokenizer info is consistent with tokenization endpoint."""
     info_response = requests.get(server.url_for("get_tokenizer_info"))
     info_response.raise_for_status()
     info = info_response.json()
     tokenize_response = requests.post(
         server.url_for("tokenize"),
-        json={"model": MODEL_NAME, "prompt": "Hello world!"}
+        json={
+            "model": MODEL_NAME,
+            "prompt": "Hello world!"
+        },
     )
     tokenize_response.raise_for_status()
     tokenize_result = tokenize_response.json()
     info_max_len = info.get("model_max_length")
     tokenize_max_len = tokenize_result.get("max_model_len")
     if info_max_len and tokenize_max_len:
-        assert info_max_len >= tokenize_max_len, "Info max length should be >= tokenize max length"
+        assert info_max_len >= tokenize_max_len, (
+            "Info max length should be >= tokenize max length")
 
 
 @pytest.mark.asyncio
@@ -373,5 +392,6 @@ async def test_get_tokenizer_info_chat_template(server: RemoteOpenAIServer):
     result = response.json()
     chat_template = result.get("chat_template")
     if chat_template:
-        assert isinstance(chat_template, str), "Chat template should be a string"
-        assert chat_template.strip(), "Chat template should not be empty"
+        assert isinstance(chat_template,
+                          str), ("Chat template should be a string")
+        assert chat_template.strip(), "Chat template should not be empty"
@@ -73,7 +73,6 @@
                                               ResponsesResponse, ScoreRequest,
                                               ScoreResponse, TokenizeRequest,
                                               TokenizeResponse,
-                                              TokenizerInfoResponse,
                                               TranscriptionRequest,
                                               TranscriptionResponse,
                                               TranslationRequest,
@@ -527,15 +526,16 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
 def maybe_register_tokenizer_info_endpoint(args):
     """Conditionally register the tokenizer info endpoint if enabled."""
     if getattr(args, 'enable_tokenizer_info_endpoint', False):
-        @router.get("/get_tokenizer_info")
+
+        @router.get("/tokenizer_info")
         async def get_tokenizer_info(raw_request: Request):
             """Get comprehensive tokenizer information."""
             result = await tokenization(raw_request).get_tokenizer_info()
-            return JSONResponse(
-                content=result.model_dump(),
-                status_code=result.code if isinstance(result, ErrorResponse) else 200)
-    
-    
+            return JSONResponse(content=result.model_dump(),
+                                status_code=result.code if isinstance(
+                                    result, ErrorResponse) else 200)
+
+
 @router.get("/v1/models")
 async def show_available_models(raw_request: Request):
     handler = models(raw_request)
 
@@ -299,9 +299,8 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         "--enable-tokenizer-info-endpoint",
         action='store_true',
         default=False,
-        help="Enable the /get_tokenizer_info endpoint. May expose chat "
-        "templates and other tokenizer configuration."
-    )
+        help="Enable the /tokenizer_info endpoint. May expose chat "
+        "templates and other tokenizer configuration.")
 
     return parser