pydantic
diff --git a/‎docs/tools.md
+56-3 b/‎docs/tools.md
+56-3
diff --git a/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py
+25-2 b/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py
+25-2
diff --git a/‎pydantic_ai_slim/pydantic_ai/messages.py
+8 b/‎pydantic_ai_slim/pydantic_ai/messages.py
+8
diff --git a/‎tests/models/cassettes/test_anthropic/test_image_as_binary_content_input.yaml
+62 b/‎tests/models/cassettes/test_anthropic/test_image_as_binary_content_input.yaml
+62
diff --git a/‎tests/models/cassettes/test_anthropic/test_image_as_binary_content_tool_response.yaml
+153 b/‎tests/models/cassettes/test_anthropic/test_image_as_binary_content_tool_response.yaml
+153
diff --git a/‎tests/models/cassettes/test_gemini/test_image_as_binary_content_tool_response.yaml
+150 b/‎tests/models/cassettes/test_gemini/test_image_as_binary_content_tool_response.yaml
+150
diff --git a/‎tests/models/cassettes/test_openai/test_image_as_binary_content_tool_response.yaml
+204 b/‎tests/models/cassettes/test_openai/test_image_as_binary_content_tool_response.yaml
+204
diff --git a/‎tests/models/cassettes/test_openai_responses/test_image_as_binary_content_tool_response.yaml
+237 b/‎tests/models/cassettes/test_openai_responses/test_image_as_binary_content_tool_response.yaml
+237
diff --git a/‎tests/models/test_anthropic.py
+30 b/‎tests/models/test_anthropic.py
+30
diff --git a/‎tests/models/test_gemini.py
+19 b/‎tests/models/test_gemini.py
+19
diff --git a/‎tests/models/test_openai.py
+15 b/‎tests/models/test_openai.py
+15
diff --git a/‎tests/models/test_openai_responses.py
+15 b/‎tests/models/test_openai_responses.py
+15
@@ -15,6 +15,8 @@ There are a number of ways to register tools with an agent:
 * via the [`@agent.tool_plain`][pydantic_ai.Agent.tool_plain] decorator — for tools that do not need access to the agent [context][pydantic_ai.tools.RunContext]
 * via the [`tools`][pydantic_ai.Agent.__init__] keyword argument to `Agent` which can take either plain functions, or instances of [`Tool`][pydantic_ai.tools.Tool]
 
+## Registering Function Tools via Decorator
+
 `@agent.tool` is considered the default decorator since in the majority of cases tools will need access to the agent context.
 
 Here's an example using both:
@@ -188,7 +190,7 @@ sequenceDiagram
     Note over Agent: Game session complete
 ```
 
-## Registering Function Tools via kwarg
+## Registering Function Tools via Agent Argument
 
 As well as using the decorators, we can register tools via the `tools` argument to the [`Agent` constructor][pydantic_ai.Agent.__init__]. This is useful when you want to reuse tools, and can also give more fine-grained control over the tools.
 
@@ -244,6 +246,59 @@ print(dice_result['b'].output)
 
 _(This example is complete, it can be run "as is")_
 
+## Function Tool Output
+
+Tools can return anything that Pydantic can serialize to JSON, as well as audio, video, image or document content depending on the types of [multi-modal input](input.md) the model supports:
+
+```python {title="function_tool_output.py"}
+from pydantic import BaseModel
+from pydantic_ai import Agent, ImageUrl, DocumentUrl
+from pydantic_ai.models.openai import OpenAIResponsesModel
+from datetime import datetime
+
+class User(BaseModel):
+    name: str
+    age: int
+
+agent = Agent(model=OpenAIResponsesModel('gpt-4o'))
+
+@agent.tool_plain
+def get_current_time() -> datetime:
+    return datetime.now()
+
+@agent.tool_plain
+def get_user() -> User:
+    return User(name='John', age=30)
+
+@agent.tool_plain
+def get_company_logo() -> ImageUrl:
+    return ImageUrl(url='https://iili.io/3Hs4FMg.png')
+
+@agent.tool_plain
+def get_document() -> DocumentUrl:
+    return DocumentUrl(url='https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf')
+
+result = agent.run_sync('What time is it?')
+print(result.output)
+# > The current time is 10:45 PM on April 17, 2025.
+
+result = agent.run_sync('What is the user name?')
+print(result.output)
+# > The user's name is John.
+
+result = agent.run_sync('What is the company name in the logo?')
+print(result.output)
+# > The company name in the logo is "Pydantic."
+
+result = agent.run_sync('What is the main content of the document?')
+print(result.output)
+# > The document contains just the text "Dummy PDF file."
+```
+
+Some models (e.g. Gemini) natively support semi-structured return values, while some expect text (OpenAI) but seem to be just as good at extracting meaning from the data. If a Python object is returned and the model expects a string, the value will be serialized to JSON.
+
+_(This example is complete, it can be run "as is")_
+
 ## Function Tools vs. Structured Outputs
 
 As the name suggests, function tools use the model's "tools" or "functions" API to let the model know what is available to call. Tools or functions are also used to define the schema(s) for structured responses, thus a model might have access to many tools, some of which call function tools while others end the run and produce a final output.
@@ -307,8 +362,6 @@ agent.run_sync('hello', model=FunctionModel(print_schema))
 
 _(This example is complete, it can be run "as is")_
 
-The return type of tool can be anything which Pydantic can serialize to JSON as some models (e.g. Gemini) support semi-structured return values, some expect text (OpenAI) but seem to be just as good at extracting meaning from the data. If a Python object is returned and the model expects a string, the value will be serialized to JSON.
-
 If a tool has a single parameter that can be represented as an object in JSON schema (e.g. dataclass, TypedDict, pydantic model), the schema for the tool is simplified to be just that object.
 
 Here's an example where we use [`TestModel.last_model_request_parameters`][pydantic_ai.models.test.TestModel.last_model_request_parameters] to inspect the tool schema that would be passed to the model.
 
@@ -576,7 +576,7 @@ def build_run_context(ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT
     )
 
 
-async def process_function_tools(
+async def process_function_tools(  # noqa C901
     tool_calls: list[_messages.ToolCallPart],
     output_tool_name: str | None,
     output_tool_call_id: str | None,
@@ -662,6 +662,8 @@ async def process_function_tools(
     if not calls_to_run:
         return
 
+    user_parts: list[_messages.UserPromptPart] = []
+
     # Run all tool tasks in parallel
     results_by_index: dict[int, _messages.ModelRequestPart] = {}
     with ctx.deps.tracer.start_as_current_span(
@@ -675,14 +677,33 @@ async def process_function_tools(
             asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer), name=call.tool_name)
             for tool, call in calls_to_run
         ]
+
+        file_index = 1
+
         pending = tasks
         while pending:
             done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
             for task in done:
                 index = tasks.index(task)
                 result = task.result()
                 yield _messages.FunctionToolResultEvent(result, tool_call_id=call_index_to_event_id[index])
-                if isinstance(result, (_messages.ToolReturnPart, _messages.RetryPromptPart)):
+
+                if isinstance(result, _messages.RetryPromptPart):
+                    results_by_index[index] = result
+                elif isinstance(result, _messages.ToolReturnPart):
+                    if result.is_multi_modal:
+                        user_parts.append(
+                            _messages.UserPromptPart(
+                                content=[f'This is file {file_index}:', result.content],
+                                timestamp=result.timestamp,
+                                part_kind='user-prompt',
+                            )
+                        )
+
+                        result.content = f'See file {file_index}.'
+
+                        file_index += 1
+
                     results_by_index[index] = result
                 else:
                     assert_never(result)
@@ -692,6 +713,8 @@ async def process_function_tools(
     for k in sorted(results_by_index):
         output_parts.append(results_by_index[k])
 
+    output_parts.extend(user_parts)
+
 
 async def _tool_from_mcp_server(
     tool_name: str,
 
@@ -253,6 +253,9 @@ def format(self) -> str:
 
 UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent'
 
+# Ideally this would be a Union of types, but Python 3.9 requires it to be a string, and strings don't work with `isinstance``.
+MultiModalContentTypes = (ImageUrl, AudioUrl, DocumentUrl, VideoUrl, BinaryContent)
+
 
 def _document_format(media_type: str) -> DocumentFormat:
     if media_type == 'application/pdf':
@@ -357,6 +360,11 @@ class ToolReturnPart:
     part_kind: Literal['tool-return'] = 'tool-return'
     """Part type identifier, this is available on all parts as a discriminator."""
 
+    @property
+    def is_multi_modal(self) -> bool:
+        """Return `True` if the content is a multi-modal content."""
+        return isinstance(self.content, MultiModalContentTypes)
+
     def model_response_str(self) -> str:
         """Return a string representation of the content for the model."""
         if isinstance(self.content, str):
 
@@ -589,6 +589,36 @@ async def test_image_url_input_invalid_mime_type(allow_model_requests: None, ant
     )
 
 
+@pytest.mark.vcr()
+async def test_image_as_binary_content_tool_response(
+    allow_model_requests: None, anthropic_api_key: str, image_content: BinaryContent
+):
+    m = AnthropicModel('claude-3-5-sonnet-latest', provider=AnthropicProvider(api_key=anthropic_api_key))
+    agent = Agent(m)
+
+    @agent.tool_plain
+    async def get_image() -> BinaryContent:
+        return image_content
+
+    result = await agent.run(['What fruit is in the image you have access to via the get_image tool?'])
+    assert result.output == snapshot(
+        "The image shows a kiwi fruit that has been cut in half, displaying its characteristic bright green flesh with small black seeds arranged in a circular pattern around a white center core. The kiwi's fuzzy brown skin is visible around the edges of the slice."
+    )
+
+
+@pytest.mark.vcr()
+async def test_image_as_binary_content_input(
+    allow_model_requests: None, anthropic_api_key: str, image_content: BinaryContent
+):
+    m = AnthropicModel('claude-3-5-sonnet-latest', provider=AnthropicProvider(api_key=anthropic_api_key))
+    agent = Agent(m)
+
+    result = await agent.run(['What is the name of this fruit?', image_content])
+    assert result.output == snapshot(
+        "This is a kiwi fruit (or simply kiwi). It's a slice showing the characteristic bright green flesh with tiny black seeds arranged in a circular pattern around a white center core. The fruit has a distinctive appearance with its fuzzy brown exterior (though only the inner flesh is shown in this cross-section image)."
+    )
+
+
 @pytest.mark.parametrize('media_type', ('audio/wav', 'audio/mpeg'))
 async def test_audio_as_binary_content_input(allow_model_requests: None, media_type: str):
     c = completion_message([TextBlock(text='world', type='text')], AnthropicUsage(input_tokens=5, output_tokens=10))
 
@@ -959,6 +959,25 @@ def handler(request: httpx.Request) -> httpx.Response:
     assert result.output == 'world'
 
 
+@pytest.mark.vcr()
+async def test_image_as_binary_content_tool_response(
+    allow_model_requests: None, gemini_api_key: str, image_content: BinaryContent
+) -> None:
+    m = GeminiModel('gemini-2.5-pro-preview-03-25', provider=GoogleGLAProvider(api_key=gemini_api_key))
+    agent = Agent(m)
+
+    @agent.tool_plain
+    async def get_image() -> BinaryContent:
+        return image_content
+
+    result = await agent.run(['What fruit is in the image you have access to via the get_image tool?'])
+    assert result.output == snapshot("""\
+Okay, I have retrieved the image.
+
+The fruit in the image is a kiwi, sliced in half.\
+""")
+
+
 @pytest.mark.vcr()
 async def test_image_as_binary_content_input(
     allow_model_requests: None, gemini_api_key: str, image_content: BinaryContent
 
@@ -640,6 +640,21 @@ async def test_image_url_input(allow_model_requests: None):
     )
 
 
+@pytest.mark.vcr()
+async def test_image_as_binary_content_tool_response(
+    allow_model_requests: None, image_content: BinaryContent, openai_api_key: str
+):
+    m = OpenAIModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    agent = Agent(m)
+
+    @agent.tool_plain
+    async def get_image() -> BinaryContent:
+        return image_content
+
+    result = await agent.run(['What fruit is in the image you have access to via the get_image tool?'])
+    assert result.output == snapshot('The fruit in the image is a kiwi.')
+
+
 @pytest.mark.vcr()
 async def test_image_as_binary_content_input(
     allow_model_requests: None, image_content: BinaryContent, openai_api_key: str
 
@@ -221,6 +221,21 @@ async def get_location(loc_name: str) -> str:
     )
 
 
+@pytest.mark.vcr()
+async def test_image_as_binary_content_tool_response(
+    allow_model_requests: None, image_content: BinaryContent, openai_api_key: str
+):
+    m = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    agent = Agent(m)
+
+    @agent.tool_plain
+    async def get_image() -> BinaryContent:
+        return image_content
+
+    result = await agent.run(['What fruit is in the image you have access to via the get_image tool?'])
+    assert result.output == snapshot('The fruit in the image is a kiwi.')
+
+
 async def test_image_as_binary_content_input(
     allow_model_requests: None, image_content: BinaryContent, openai_api_key: str
 ):