fix(litellm): [MLOS-182] select metadata keys to tag from litellm kwargs (#14067) [backport to 3.10] (#14083)

ncybul · Yun-Kim · brettlangdon · rachelyangdog · commit 92f6ee441d80 · 2025-07-23T10:14:31.000-04:00
We received a customer ticket explaining that `vertex_credentials` was being collected as a metadata field on LLM spans emitted by the LiteLLM integration. This PR addresses that issue and safeguards against potentially other sensitive information being exposed by explicitly selecting a subset of kwargs to attach to the LLM span's metadata. Keys were chosen based on the arguments passed into the LiteLLM SDK [completion](https://github.com/BerriAI/litellm/blob/main/litellm/main.py#L874-L917) method and [text_completion](https://github.com/BerriAI/litellm/blob/main/litellm/main.py#L4446-L4497) method. I verified that this PR does resolve the issue. The following script was run to produce this [trace](https://app.datadoghq.com/llm/traces?query=%40ml_app%3Anicole-test%20%40event_type%3Aspan%20%40parent_id%3Aundefined&agg_m=count&agg_m_source=base&agg_t=count&fromUser=true&llmPanels=%5B%7B%22t%22%3A%22sampleDetailPanel%22%2C%22rEID%22%3A%22AwAAAZgeNigRWrccCwAAABhBWmdlTmlnUkFBQmM2eEc4M1pUNUFBQUEAAAAkMDE5ODFlMzYtNDNiMi00NGQ1LWJlMTUtNzk3MDUxZTNmNTBhAAAALQ%22%7D%5D&spanId=1740319997238873855&start=1752852637158&end=1752853537158&paused=false) where the metadata field does not contain `vertex_credentials`. ``` from litellm import completion import json file_path = '/path/to/credentials' with open(file_path, 'r') as file: vertex_credentials = json.load(file) vertex_credentials_json = json.dumps(vertex_credentials) response = completion( model="vertex_ai/gemini-1.5-flash", messages=[{ "content": "Hello, how are you?","role": "user"}], vertex_credentials=vertex_credentials_json, stream=True ) for chunk in response: print(chunk) ``` - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- (cherry picked from commit e31f11f) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> Co-authored-by: Brett Langdon <brett.langdon@datadoghq.com>
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
@@ -71,9 +71,9 @@ def _llmobs_set_tags(
 
         # use Open AI helpers since response format will match Open AI
         if self.is_completion_operation(operation):
-            openai_set_meta_tags_from_completion(span, kwargs, response)
+            openai_set_meta_tags_from_completion(span, kwargs, response, integration_name="litellm")
         else:
-            openai_set_meta_tags_from_chat(span, kwargs, response)
+            openai_set_meta_tags_from_chat(span, kwargs, response, integration_name="litellm")
 
         # custom logic for updating metadata on litellm spans
         self._update_litellm_metadata(span, kwargs, operation)
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
@@ -50,6 +50,57 @@
     LITELLM_ROUTER_INSTANCE_KEY,
 )
 
+LITELLM_METADATA_CHAT_KEYS = (
+    "timeout",
+    "temperature",
+    "top_p",
+    "n",
+    "stream",
+    "stream_options",
+    "stop",
+    "max_completion_tokens",
+    "max_tokens",
+    "modalities",
+    "prediction",
+    "presence_penalty",
+    "frequency_penalty",
+    "logit_bias",
+    "user",
+    "response_format",
+    "seed",
+    "tool_choice",
+    "parallel_tool_calls",
+    "logprobs",
+    "top_logprobs",
+    "deployment_id",
+    "reasoning_effort",
+    "base_url",
+    "api_base",
+    "api_version",
+    "model_list",
+)
+LITELLM_METADATA_COMPLETION_KEYS = (
+    "best_of",
+    "echo",
+    "frequency_penalty",
+    "logit_bias",
+    "logprobs",
+    "max_tokens",
+    "n",
+    "presence_penalty",
+    "stop",
+    "stream",
+    "stream_options",
+    "suffix",
+    "temperature",
+    "top_p",
+    "user",
+    "api_base",
+    "api_version",
+    "model_list",
+    "custom_llm_provider",
+)
+
 
 def extract_model_name_google(instance, model_name_attr):
     """Extract the model name from the instance.
@@ -299,12 +350,14 @@ def get_messages_from_converse_content(role: str, content: list):
     return messages
 
 
-def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None:
+def openai_set_meta_tags_from_completion(
+    span: Span, kwargs: Dict[str, Any], completions: Any, integration_name: str = "openai"
+) -> None:
     """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags."""
     prompt = kwargs.get("prompt", "")
     if isinstance(prompt, str):
         prompt = [prompt]
-    parameters = {k: v for k, v in kwargs.items() if k not in OPENAI_SKIPPED_COMPLETION_TAGS}
+    parameters = get_metadata_from_kwargs(kwargs, integration_name, "completion")
     output_messages = [{"content": ""}]
     if not span.error and completions:
         choices = getattr(completions, "choices", completions)
@@ -318,15 +371,17 @@ def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], com
     )
 
 
-def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None:
+def openai_set_meta_tags_from_chat(
+    span: Span, kwargs: Dict[str, Any], messages: Optional[Any], integration_name: str = "openai"
+) -> None:
     """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags."""
     input_messages = []
     for m in kwargs.get("messages", []):
         tool_call_id = m.get("tool_call_id")
         if tool_call_id:
             core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span))
         input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))})
-    parameters = {k: v for k, v in kwargs.items() if k not in OPENAI_SKIPPED_CHAT_TAGS}
+    parameters = get_metadata_from_kwargs(kwargs, integration_name, "chat")
     span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters})
 
     if span.error or not messages:
@@ -398,6 +453,19 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages:
     span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
 
 
+def get_metadata_from_kwargs(
+    kwargs: Dict[str, Any], integration_name: str = "openai", operation: str = "chat"
+) -> Dict[str, Any]:
+    metadata = {}
+    if integration_name == "openai":
+        keys_to_skip = OPENAI_SKIPPED_CHAT_TAGS if operation == "chat" else OPENAI_SKIPPED_COMPLETION_TAGS
+        metadata = {k: v for k, v in kwargs.items() if k not in keys_to_skip}
+    elif integration_name == "litellm":
+        keys_to_include = LITELLM_METADATA_CHAT_KEYS if operation == "chat" else LITELLM_METADATA_COMPLETION_KEYS
+        metadata = {k: v for k, v in kwargs.items() if k in keys_to_include}
+    return metadata
+
+
 def openai_get_input_messages_from_response_input(
     messages: Optional[Union[str, List[Dict[str, Any]]]]
 ) -> List[Dict[str, Any]]:
diff --git a/releasenotes/notes/litellm-scrub-metadata-135109a6a5324111.yaml b/releasenotes/notes/litellm-scrub-metadata-135109a6a5324111.yaml
@@ -0,0 +1,4 @@
+fixes:
+  - |
+    litellm: This fix resolves an issue where potentially sensitive parameters were being tagged as metadata on LLM Observability spans.
+    Now, metadata tags are based on an allowlist instead of a denylist.

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +fixes:
 +  - |
 +    litellm: This fix resolves an issue where potentially sensitive parameters were being tagged as metadata on LLM Observability spans.
 +    Now, metadata tags are based on an allowlist instead of a denylist.