elastic · jonathan-buttner · Jun 4, 2025 · May 27, 2025 · May 27, 2025 · May 27, 2025
diff --git a/docs/changelog/128538.yaml b/docs/changelog/128538.yaml
@@ -0,0 +1,5 @@
+pr: 128538
+summary: "Added Mistral Chat Completion support to the Inference Plugin"
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -182,6 +182,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ML_INFERENCE_HUGGING_FACE_RERANK_ADDED_8_19 = def(8_841_0_36);
     public static final TransportVersion ML_INFERENCE_SAGEMAKER_CHAT_COMPLETION_8_19 = def(8_841_0_37);
     public static final TransportVersion ML_INFERENCE_VERTEXAI_CHATCOMPLETION_ADDED_8_19 = def(8_841_0_38);
+    public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED_8_19 = def(8_841_0_39);
     public static final TransportVersion V_9_0_0 = def(9_000_0_09);
     public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
     public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@@ -268,7 +269,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion SETTINGS_IN_DATA_STREAMS_DRY_RUN = def(9_081_0_00);
     public static final TransportVersion ML_INFERENCE_SAGEMAKER_CHAT_COMPLETION = def(9_082_0_00);
     public static final TransportVersion ML_INFERENCE_VERTEXAI_CHATCOMPLETION_ADDED = def(9_083_0_00);
-
+    public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_084_0_00);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

diff --git a/...tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/...tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java
@@ -124,7 +124,7 @@ public void testGetServicesWithRerankTaskType() throws IOException {
 
     public void testGetServicesWithCompletionTaskType() throws IOException {
         List<Object> services = getServices(TaskType.COMPLETION);
-        assertThat(services.size(), equalTo(12));
+        assertThat(services.size(), equalTo(13));
 
         var providers = providers(services);
 
@@ -143,15 +143,16 @@ public void testGetServicesWithCompletionTaskType() throws IOException {
                     "openai",
                     "streaming_completion_test_service",
                     "hugging_face",
-                    "amazon_sagemaker"
+                    "amazon_sagemaker",
+                    "mistral"
                 ).toArray()
             )
         );
     }
 
     public void testGetServicesWithChatCompletionTaskType() throws IOException {
         List<Object> services = getServices(TaskType.CHAT_COMPLETION);
-        assertThat(services.size(), equalTo(7));
+        assertThat(services.size(), equalTo(8));
 
         var providers = providers(services);
 
@@ -165,7 +166,8 @@ public void testGetServicesWithChatCompletionTaskType() throws IOException {
                     "streaming_completion_test_service",
                     "hugging_face",
                     "amazon_sagemaker",
-                    "googlevertexai"
+                    "googlevertexai",
+                    "mistral"
                 ).toArray()
             )
         );

diff --git a/...nce/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/...nce/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java
@@ -91,6 +91,7 @@
 import org.elasticsearch.xpack.inference.services.jinaai.embeddings.JinaAIEmbeddingsTaskSettings;
 import org.elasticsearch.xpack.inference.services.jinaai.rerank.JinaAIRerankServiceSettings;
 import org.elasticsearch.xpack.inference.services.jinaai.rerank.JinaAIRerankTaskSettings;
+import org.elasticsearch.xpack.inference.services.mistral.completion.MistralChatCompletionServiceSettings;
 import org.elasticsearch.xpack.inference.services.mistral.embeddings.MistralEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionServiceSettings;
 import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionTaskSettings;
@@ -218,6 +219,13 @@ private static void addMistralNamedWriteables(List<NamedWriteableRegistry.Entry>
                 MistralEmbeddingsServiceSettings::new
             )
         );
+        namedWriteables.add(
+            new NamedWriteableRegistry.Entry(
+                ServiceSettings.class,
+                MistralChatCompletionServiceSettings.NAME,
+                MistralChatCompletionServiceSettings::new
+            )
+        );
 
         // note - no task settings for Mistral embeddings...
     }

diff --git a/...org/elasticsearch/xpack/inference/external/response/streaming/StreamingErrorResponse.java b/...org/elasticsearch/xpack/inference/external/response/streaming/StreamingErrorResponse.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.external.response.streaming;
+
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.xcontent.ConstructingObjectParser;
+import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
+
+import java.util.Objects;
+import java.util.Optional;
+
+public class StreamingErrorResponse extends ErrorResponse {
+    private static final ConstructingObjectParser<Optional<ErrorResponse>, Void> ERROR_PARSER = new ConstructingObjectParser<>(
+        "streaming_error",
+        true,
+        args -> Optional.ofNullable((StreamingErrorResponse) args[0])
+    );
+    private static final ConstructingObjectParser<StreamingErrorResponse, Void> ERROR_BODY_PARSER = new ConstructingObjectParser<>(
+        "streaming_error",
+        true,
+        args -> new StreamingErrorResponse((String) args[0], (String) args[1], (String) args[2], (String) args[3])
+    );
+
+    static {
+        ERROR_BODY_PARSER.declareString(ConstructingObjectParser.constructorArg(), new ParseField("message"));
+        ERROR_BODY_PARSER.declareStringOrNull(ConstructingObjectParser.optionalConstructorArg(), new ParseField("code"));
+        ERROR_BODY_PARSER.declareStringOrNull(ConstructingObjectParser.optionalConstructorArg(), new ParseField("param"));
+        ERROR_BODY_PARSER.declareString(ConstructingObjectParser.constructorArg(), new ParseField("type"));
+
+        ERROR_PARSER.declareObjectOrNull(
+            ConstructingObjectParser.optionalConstructorArg(),
+            ERROR_BODY_PARSER,
+            null,
+            new ParseField("error")
+        );
+    }
+
+    /**
+     * Standard error response parser. This can be overridden for those subclasses that
+     * have a different error response structure.
+     * @param response The error response as an HttpResult
+     */
+    public static ErrorResponse fromResponse(HttpResult response) {
+        try (
+            XContentParser parser = XContentFactory.xContent(XContentType.JSON)
+                .createParser(XContentParserConfiguration.EMPTY, response.body())
+        ) {
+            return ERROR_PARSER.apply(parser, null).orElse(ErrorResponse.UNDEFINED_ERROR);
+        } catch (Exception e) {
+            // swallow the error
+        }
+
+        return ErrorResponse.UNDEFINED_ERROR;
+    }
+
+    /**
+     * Standard error response parser. This can be overridden for those subclasses that
+     * have a different error response structure.
+     * @param response The error response as a string
+     */
+    public static ErrorResponse fromString(String response) {
+        try (
+            XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(XContentParserConfiguration.EMPTY, response)
+        ) {
+            return ERROR_PARSER.apply(parser, null).orElse(ErrorResponse.UNDEFINED_ERROR);
+        } catch (Exception e) {
+            // swallow the error
+        }
+
+        return ErrorResponse.UNDEFINED_ERROR;
+    }
+
+    @Nullable
+    private final String code;
+    @Nullable
+    private final String param;
+    private final String type;
+
+    StreamingErrorResponse(String errorMessage, @Nullable String code, @Nullable String param, String type) {
+        super(errorMessage);
+        this.code = code;
+        this.param = param;
+        this.type = Objects.requireNonNull(type);
+    }
+
+    @Nullable
+    public String code() {
+        return code;
+    }
+
+    @Nullable
+    public String param() {
+        return param;
+    }
+
+    public String type() {
+        return type;
+    }
+}
diff --git a/...rg/elasticsearch/xpack/inference/external/unified/UnifiedChatCompletionRequestEntity.java b/...rg/elasticsearch/xpack/inference/external/unified/UnifiedChatCompletionRequestEntity.java
@@ -15,6 +15,10 @@
 import java.io.IOException;
 import java.util.Objects;
 
+/**
+ * Represents a unified chat completion request entity.
+ * This class is used to convert the unified chat input into a format that can be serialized to XContent.
+ */
 public class UnifiedChatCompletionRequestEntity implements ToXContentFragment {
 
     public static final String STREAM_FIELD = "stream";
@@ -43,11 +47,19 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
 
         builder.field(STREAM_FIELD, stream);
         if (stream) {
-            builder.startObject(STREAM_OPTIONS_FIELD);
-            builder.field(INCLUDE_USAGE_FIELD, true);
-            builder.endObject();
+            fillStreamOptionsFields(builder);
         }
 
         return builder;
     }
+
+    /**
+     * This method is used to fill the stream options fields in the request entity.
+     * It is called when the stream option is set to true.
+     */
+    protected void fillStreamOptionsFields(XContentBuilder builder) throws IOException {
+        builder.startObject(STREAM_OPTIONS_FIELD);
+        builder.field(INCLUDE_USAGE_FIELD, true);
+        builder.endObject();
+    }
 }
diff --git a/.../org/elasticsearch/xpack/inference/services/mistral/MistralCompletionResponseHandler.java b/.../org/elasticsearch/xpack/inference/services/mistral/MistralCompletionResponseHandler.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.mistral;
+
+import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser;
+import org.elasticsearch.xpack.inference.services.mistral.response.MistralErrorResponse;
+import org.elasticsearch.xpack.inference.services.openai.OpenAiChatCompletionResponseHandler;
+
+/**
+ * Handles non-streaming completion responses for Mistral models, extending the OpenAI completion response handler.
+ * This class is specifically designed to handle Mistral's error response format.
+ */
+public class MistralCompletionResponseHandler extends OpenAiChatCompletionResponseHandler {
+
+    /**
+     * Constructs a MistralCompletionResponseHandler with the specified request type and response parser.
+     *
+     * @param requestType The type of request being handled (e.g., "mistral completions").
+     * @param parseFunction The function to parse the response.
+     */
+    public MistralCompletionResponseHandler(String requestType, ResponseParser parseFunction) {
+        super(requestType, parseFunction, MistralErrorResponse::fromResponse);
+    }
+}
diff --git a/...ce/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralConstants.java b/...ce/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralConstants.java
@@ -9,6 +9,7 @@
 
 public class MistralConstants {
     public static final String API_EMBEDDINGS_PATH = "https://api.mistral.ai/v1/embeddings";
+    public static final String API_COMPLETIONS_PATH = "https://api.mistral.ai/v1/chat/completions";
 
     // note - there is no bounds information available from Mistral,
     // so we'll use a sane default here which is the same as Cohere's
@@ -18,4 +19,8 @@ public class MistralConstants {
     public static final String MODEL_FIELD = "model";
     public static final String INPUT_FIELD = "input";
     public static final String ENCODING_FORMAT_FIELD = "encoding_format";
+    public static final String MAX_TOKENS_FIELD = "max_tokens";
+    public static final String DETAIL_FIELD = "detail";
+    public static final String MSG_FIELD = "msg";
+    public static final String MESSAGE_FIELD = "message";
 }
diff --git a/...a/org/elasticsearch/xpack/inference/services/mistral/MistralEmbeddingsRequestManager.java b/...a/org/elasticsearch/xpack/inference/services/mistral/MistralEmbeddingsRequestManager.java
@@ -22,7 +22,7 @@
 import org.elasticsearch.xpack.inference.external.response.ErrorMessageResponseEntity;
 import org.elasticsearch.xpack.inference.services.azureopenai.response.AzureMistralOpenAiExternalResponseHandler;
 import org.elasticsearch.xpack.inference.services.mistral.embeddings.MistralEmbeddingsModel;
-import org.elasticsearch.xpack.inference.services.mistral.request.MistralEmbeddingsRequest;
+import org.elasticsearch.xpack.inference.services.mistral.request.embeddings.MistralEmbeddingsRequest;
 import org.elasticsearch.xpack.inference.services.mistral.response.MistralEmbeddingsResponseEntity;
 
 import java.util.List;

diff --git a/...erence/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralModel.java b/...erence/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralModel.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.mistral;
+
+import org.elasticsearch.inference.ModelConfigurations;
+import org.elasticsearch.inference.ModelSecrets;
+import org.elasticsearch.inference.ServiceSettings;
+import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
+import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
+import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ * Represents a Mistral model that can be used for inference tasks.
+ * This class extends RateLimitGroupingModel to handle rate limiting based on model and API key.
+ */
+public abstract class MistralModel extends RateLimitGroupingModel {
+    protected String model;
+    protected URI uri;
+    protected RateLimitSettings rateLimitSettings;
+
+    protected MistralModel(ModelConfigurations configurations, ModelSecrets secrets) {
+        super(configurations, secrets);
+    }
+
+    protected MistralModel(RateLimitGroupingModel model, ServiceSettings serviceSettings) {
+        super(model, serviceSettings);
+    }
+
+    public String model() {
+        return this.model;
+    }
+
+    public URI uri() {
+        return this.uri;
+    }
+
+    @Override
+    public RateLimitSettings rateLimitSettings() {
+        return this.rateLimitSettings;
+    }
+
+    @Override
+    public int rateLimitGroupingHash() {
+        return 0;
+    }
+
+    // Needed for testing only
+    public void setURI(String newUri) {
+        try {
+            this.uri = new URI(newUri);
+        } catch (URISyntaxException e) {
+            // swallow any error
+        }
+    }
+
+    @Override
+    public DefaultSecretSettings getSecretSettings() {
+        return (DefaultSecretSettings) super.getSecretSettings();
+    }
+}