From 4bc00a76ef7422d29d5c47447b96485db78b377b Mon Sep 17 00:00:00 2001 From: Rory Malcolm Date: Sun, 27 Apr 2025 23:01:34 +0100 Subject: [PATCH 1/2] feat: incident.io Notifier - Adds the technical implementation, and tests, for the incident.io notifier - Configured through the following config: ```yaml receivers: - name: 'incidentio-notifications' incidentio_configs: - url: '$alert_source_url' alert_source_token: '$alert_source_token' ``` Signed-off-by: Rory Malcolm --- config/config.go | 1 + config/notifiers.go | 50 +++++ config/receiver/receiver.go | 4 + notify/incidentio/incidentio.go | 204 ++++++++++++++++++ notify/incidentio/incidentio_test.go | 298 +++++++++++++++++++++++++++ notify/notify.go | 1 + 6 files changed, 558 insertions(+) create mode 100644 notify/incidentio/incidentio.go create mode 100644 notify/incidentio/incidentio_test.go diff --git a/config/config.go b/config/config.go index 82885b233a..55aa05cf76 100644 --- a/config/config.go +++ b/config/config.go @@ -1007,6 +1007,7 @@ type Receiver struct { DiscordConfigs []*DiscordConfig `yaml:"discord_configs,omitempty" json:"discord_configs,omitempty"` EmailConfigs []*EmailConfig `yaml:"email_configs,omitempty" json:"email_configs,omitempty"` + IncidentioConfigs []*IncidentioConfig `yaml:"incidentio_configs,omitempty" json:"incidentio_configs,omitempty"` PagerdutyConfigs []*PagerdutyConfig `yaml:"pagerduty_configs,omitempty" json:"pagerduty_configs,omitempty"` SlackConfigs []*SlackConfig `yaml:"slack_configs,omitempty" json:"slack_configs,omitempty"` WebhookConfigs []*WebhookConfig `yaml:"webhook_configs,omitempty" json:"webhook_configs,omitempty"` diff --git a/config/notifiers.go b/config/notifiers.go index 87f806aa27..0a94a0ae7c 100644 --- a/config/notifiers.go +++ b/config/notifiers.go @@ -28,6 +28,13 @@ import ( ) var ( + // DefaultIncidentioConfig defines default values for Incident.io configurations. + DefaultIncidentioConfig = IncidentioConfig{ + NotifierConfig: NotifierConfig{ + VSendResolved: true, + }, + } + // DefaultWebhookConfig defines default values for Webhook configurations. DefaultWebhookConfig = WebhookConfig{ NotifierConfig: NotifierConfig{ @@ -521,6 +528,49 @@ func (c *SlackConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return nil } +// IncidentioConfig configures notifications via incident.io. +type IncidentioConfig struct { + NotifierConfig `yaml:",inline" json:",inline"` + + HTTPConfig *commoncfg.HTTPClientConfig `yaml:"http_config,omitempty" json:"http_config,omitempty"` + + // URL to send POST request to. + URL *SecretURL `yaml:"url" json:"url"` + URLFile string `yaml:"url_file" json:"url_file"` + + // AlertSourceToken is the key used to authenticate with the alert source in incident.io. + AlertSourceToken Secret `yaml:"alert_source_token,omitempty" json:"alert_source_token,omitempty"` + AlertSourceTokenFile string `yaml:"alert_source_token_file,omitempty" json:"alert_source_token_file,omitempty"` + + // MaxAlerts is the maximum number of alerts to be sent per incident.io message. + // Alerts exceeding this threshold will be truncated. Setting this to 0 + // allows an unlimited number of alerts. + MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"` + + // Timeout is the maximum time allowed to invoke incident.io. Setting this to 0 + // does not impose a timeout. + Timeout time.Duration `yaml:"timeout" json:"timeout"` +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (c *IncidentioConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + *c = DefaultIncidentioConfig + type plain IncidentioConfig + if err := unmarshal((*plain)(c)); err != nil { + return err + } + if c.URL == nil && c.URLFile == "" { + return errors.New("one of url or url_file must be configured") + } + if c.URL != nil && c.URLFile != "" { + return errors.New("at most one of url & url_file must be configured") + } + if c.AlertSourceToken != "" && c.AlertSourceTokenFile != "" { + return errors.New("at most one of alert_source_token & alert_source_token_file must be configured") + } + return nil +} + // WebhookConfig configures notifications via a generic webhook. type WebhookConfig struct { NotifierConfig `yaml:",inline" json:",inline"` diff --git a/config/receiver/receiver.go b/config/receiver/receiver.go index d92a19a4c5..23be2b11c8 100644 --- a/config/receiver/receiver.go +++ b/config/receiver/receiver.go @@ -23,6 +23,7 @@ import ( "github.com/prometheus/alertmanager/notify" "github.com/prometheus/alertmanager/notify/discord" "github.com/prometheus/alertmanager/notify/email" + "github.com/prometheus/alertmanager/notify/incidentio" "github.com/prometheus/alertmanager/notify/jira" "github.com/prometheus/alertmanager/notify/msteams" "github.com/prometheus/alertmanager/notify/msteamsv2" @@ -106,6 +107,9 @@ func BuildReceiverIntegrations(nc config.Receiver, tmpl *template.Template, logg for i, c := range nc.JiraConfigs { add("jira", i, c, func(l *slog.Logger) (notify.Notifier, error) { return jira.New(c, tmpl, l, httpOpts...) }) } + for i, c := range nc.IncidentioConfigs { + add("incidentio", i, c, func(l *slog.Logger) (notify.Notifier, error) { return incidentio.New(c, tmpl, l, httpOpts...) }) + } for i, c := range nc.RocketchatConfigs { add("rocketchat", i, c, func(l *slog.Logger) (notify.Notifier, error) { return rocketchat.New(c, tmpl, l, httpOpts...) }) } diff --git a/notify/incidentio/incidentio.go b/notify/incidentio/incidentio.go new file mode 100644 index 0000000000..39f1db6cc0 --- /dev/null +++ b/notify/incidentio/incidentio.go @@ -0,0 +1,204 @@ +// Copyright 2025 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package incidentio + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "os" + "strings" + + commoncfg "github.com/prometheus/common/config" + + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/notify" + "github.com/prometheus/alertmanager/template" + "github.com/prometheus/alertmanager/types" +) + +// Notifier implements a Notifier for incident.io. +type Notifier struct { + conf *config.IncidentioConfig + tmpl *template.Template + logger *slog.Logger + client *http.Client + retrier *notify.Retrier +} + +// New returns a new incident.io notifier. +func New(conf *config.IncidentioConfig, t *template.Template, l *slog.Logger, httpOpts ...commoncfg.HTTPClientOption) (*Notifier, error) { + // If alert source token is specified, set authorization in HTTP config + if conf.HTTPConfig == nil { + conf.HTTPConfig = &commoncfg.HTTPClientConfig{} + } + + if conf.AlertSourceToken != "" { + if conf.HTTPConfig.Authorization == nil { + conf.HTTPConfig.Authorization = &commoncfg.Authorization{ + Type: "Bearer", + Credentials: commoncfg.Secret(conf.AlertSourceToken), + } + } + } else if conf.AlertSourceTokenFile != "" { + content, err := os.ReadFile(conf.AlertSourceTokenFile) + if err != nil { + return nil, fmt.Errorf("failed to read alert_source_token_file: %w", err) + } + + if conf.HTTPConfig.Authorization == nil { + conf.HTTPConfig.Authorization = &commoncfg.Authorization{ + Type: "Bearer", + Credentials: commoncfg.Secret(strings.TrimSpace(string(content))), + } + } + } + + client, err := commoncfg.NewClientFromConfig(*conf.HTTPConfig, "incidentio", httpOpts...) + if err != nil { + return nil, err + } + + return &Notifier{ + conf: conf, + tmpl: t, + logger: l, + client: client, + // Always retry on 429 (rate limiting) and 5xx response codes. + retrier: ¬ify.Retrier{ + RetryCodes: []int{ + http.StatusTooManyRequests, // 429 + http.StatusInternalServerError, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout, + }, + CustomDetailsFunc: errDetails, + }, + }, nil +} + +// Message defines the JSON object sent to incident.io endpoints. +type Message struct { + *template.Data + + // The protocol version. + Version string `json:"version"` + GroupKey string `json:"groupKey"` + TruncatedAlerts uint64 `json:"truncatedAlerts"` +} + +func truncateAlerts(maxAlerts uint64, alerts []*types.Alert) ([]*types.Alert, uint64) { + if maxAlerts != 0 && uint64(len(alerts)) > maxAlerts { + return alerts[:maxAlerts], uint64(len(alerts)) - maxAlerts + } + + return alerts, 0 +} + +// Notify implements the Notifier interface. +func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, error) { + alerts, numTruncated := truncateAlerts(n.conf.MaxAlerts, alerts) + data := notify.GetTemplateData(ctx, n.tmpl, alerts, n.logger) + + groupKey, err := notify.ExtractGroupKey(ctx) + if err != nil { + return false, err + } + + n.logger.Debug("incident.io notification", "groupKey", groupKey) + + msg := &Message{ + Version: "4", + Data: data, + GroupKey: groupKey.String(), + TruncatedAlerts: numTruncated, + } + + var buf bytes.Buffer + if err := json.NewEncoder(&buf).Encode(msg); err != nil { + return false, err + } + + var url string + if n.conf.URL != nil { + url = n.conf.URL.String() + } else { + content, err := os.ReadFile(n.conf.URLFile) + if err != nil { + return false, fmt.Errorf("read url_file: %w", err) + } + url = strings.TrimSpace(string(content)) + } + + if n.conf.Timeout > 0 { + postCtx, cancel := context.WithTimeoutCause(ctx, n.conf.Timeout, fmt.Errorf("configured incident.io timeout reached (%s)", n.conf.Timeout)) + defer cancel() + ctx = postCtx + } + + resp, err := notify.PostJSON(ctx, n.client, url, &buf) + if err != nil { + if ctx.Err() != nil { + err = fmt.Errorf("%w: %w", err, context.Cause(ctx)) + } + return true, notify.RedactURL(err) + } + defer notify.Drain(resp) + + shouldRetry, err := n.retrier.Check(resp.StatusCode, resp.Body) + if err != nil { + return shouldRetry, notify.NewErrorWithReason(notify.GetFailureReasonFromStatusCode(resp.StatusCode), err) + } + return shouldRetry, err +} + +// errDetails extracts error details from the response for better error messages. +func errDetails(status int, body io.Reader) string { + if body == nil { + return "" + } + + // Try to decode the error message from JSON response + var errorResponse struct { + Message string `json:"message"` + Errors []string `json:"errors"` + Error string `json:"error"` + } + + if err := json.NewDecoder(body).Decode(&errorResponse); err != nil { + return "" + } + + // Format the error message + var parts []string + if errorResponse.Message != "" { + parts = append(parts, errorResponse.Message) + } + if errorResponse.Error != "" { + parts = append(parts, errorResponse.Error) + } + if len(errorResponse.Errors) > 0 { + parts = append(parts, strings.Join(errorResponse.Errors, ", ")) + } + + if len(parts) > 0 { + return strings.Join(parts, ": ") + } + return "" +} diff --git a/notify/incidentio/incidentio_test.go b/notify/incidentio/incidentio_test.go new file mode 100644 index 0000000000..5456221322 --- /dev/null +++ b/notify/incidentio/incidentio_test.go @@ -0,0 +1,298 @@ +// Copyright 2025 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package incidentio + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "os" + "testing" + "time" + + commoncfg "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" + "github.com/stretchr/testify/require" + + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/notify" + "github.com/prometheus/alertmanager/notify/test" + "github.com/prometheus/alertmanager/types" +) + +func TestIncidentIORetry(t *testing.T) { + notifier, err := New( + &config.IncidentioConfig{ + URL: &config.SecretURL{URL: &url.URL{Scheme: "https", Host: "example.com"}}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + promslog.NewNopLogger(), + ) + require.NoError(t, err) + + retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests) + for statusCode, expected := range test.RetryTests(retryCodes) { + actual, _ := notifier.retrier.Check(statusCode, nil) + require.Equal(t, expected, actual, "retry - error on status %d", statusCode) + } +} + +func TestIncidentIORedactedURL(t *testing.T) { + ctx, u, fn := test.GetContextWithCancelingURL() + defer fn() + + notifier, err := New( + &config.IncidentioConfig{ + URL: &config.SecretURL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + promslog.NewNopLogger(), + ) + require.NoError(t, err) + + test.AssertNotifyLeaksNoSecret(ctx, t, notifier, u.String()) +} + +func TestIncidentIOURLFromFile(t *testing.T) { + ctx, u, fn := test.GetContextWithCancelingURL() + defer fn() + + f, err := os.CreateTemp("", "incidentio_test") + require.NoError(t, err, "creating temp file failed") + _, err = f.WriteString(u.String() + "\n") + require.NoError(t, err, "writing to temp file failed") + + notifier, err := New( + &config.IncidentioConfig{ + URLFile: f.Name(), + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + promslog.NewNopLogger(), + ) + require.NoError(t, err) + + test.AssertNotifyLeaksNoSecret(ctx, t, notifier, u.String()) +} + +func TestIncidentIOTruncateAlerts(t *testing.T) { + alerts := make([]*types.Alert, 10) + + truncatedAlerts, numTruncated := truncateAlerts(0, alerts) + require.Len(t, truncatedAlerts, 10) + require.EqualValues(t, 0, numTruncated) + + truncatedAlerts, numTruncated = truncateAlerts(4, alerts) + require.Len(t, truncatedAlerts, 4) + require.EqualValues(t, 6, numTruncated) + + truncatedAlerts, numTruncated = truncateAlerts(100, alerts) + require.Len(t, truncatedAlerts, 10) + require.EqualValues(t, 0, numTruncated) +} + +func TestIncidentIONotify(t *testing.T) { + // Test regular notifications are correctly sent + server := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + // Verify the content type header + contentType := r.Header.Get("Content-Type") + require.Equal(t, "application/json", contentType) + + // Decode the webhook payload + var msg Message + require.NoError(t, json.NewDecoder(r.Body).Decode(&msg)) + + // Verify required fields + require.Equal(t, "4", msg.Version) + require.NotEmpty(t, msg.GroupKey) + w.WriteHeader(http.StatusOK) + }, + )) + defer server.Close() + + u, err := url.Parse(server.URL) + require.NoError(t, err) + + notifier, err := New( + &config.IncidentioConfig{ + URL: &config.SecretURL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + promslog.NewNopLogger(), + ) + require.NoError(t, err) + + ctx := context.Background() + ctx = notify.WithGroupKey(ctx, "1") + + alert := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "alertname": "TestAlert", + "severity": "critical", + }, + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Hour), + }, + } + + retry, err := notifier.Notify(ctx, alert) + require.NoError(t, err) + require.False(t, retry) +} + +func TestIncidentIORetryScenarios(t *testing.T) { + testCases := []struct { + name string + statusCode int + responseBody []byte + expectRetry bool + expectErrorMsgContains string + }{ + { + name: "success response", + statusCode: http.StatusOK, + responseBody: []byte(`{"status":"success"}`), + expectRetry: false, + expectErrorMsgContains: "", + }, + { + name: "rate limit response", + statusCode: http.StatusTooManyRequests, + responseBody: []byte(`{"error":"rate limit exceeded","message":"Too many requests"}`), + expectRetry: true, + expectErrorMsgContains: "rate limit exceeded", + }, + { + name: "server error response", + statusCode: http.StatusInternalServerError, + responseBody: []byte(`{"error":"internal error"}`), + expectRetry: true, + expectErrorMsgContains: "internal error", + }, + { + name: "client error response", + statusCode: http.StatusBadRequest, + responseBody: []byte(`{"error":"invalid request","message":"Invalid payload format"}`), + expectRetry: false, + expectErrorMsgContains: "invalid request", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(tc.statusCode) + w.Write(tc.responseBody) + }, + )) + defer server.Close() + + u, err := url.Parse(server.URL) + require.NoError(t, err) + + notifier, err := New( + &config.IncidentioConfig{ + URL: &config.SecretURL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + promslog.NewNopLogger(), + ) + require.NoError(t, err) + + ctx := context.Background() + ctx = notify.WithGroupKey(ctx, "1") + + alert := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "alertname": "TestAlert", + "severity": "critical", + }, + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Hour), + }, + } + + retry, err := notifier.Notify(ctx, alert) + if tc.expectErrorMsgContains == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectErrorMsgContains) + } + require.Equal(t, tc.expectRetry, retry) + }) + } +} + +func TestIncidentIOErrDetails(t *testing.T) { + for _, tc := range []struct { + name string + status int + body io.Reader + expect string + }{ + { + name: "empty body", + status: http.StatusBadRequest, + body: nil, + expect: "", + }, + { + name: "single error field", + status: http.StatusBadRequest, + body: bytes.NewBufferString(`{"error":"Invalid request"}`), + expect: "Invalid request", + }, + { + name: "message and errors", + status: http.StatusBadRequest, + body: bytes.NewBufferString(`{"message":"Validation failed","errors":["Field is required","Value too long"]}`), + expect: "Validation failed: Field is required, Value too long", + }, + { + name: "message and error", + status: http.StatusTooManyRequests, + body: bytes.NewBufferString(`{"message":"Too many requests","error":"Rate limit exceeded"}`), + expect: "Too many requests: Rate limit exceeded", + }, + { + name: "invalid JSON", + status: http.StatusBadRequest, + body: bytes.NewBufferString(`{invalid}`), + expect: "", + }, + } { + t.Run(tc.name, func(t *testing.T) { + result := errDetails(tc.status, tc.body) + if tc.expect == "" { + require.Equal(t, "", result) + } else { + require.Contains(t, result, tc.expect) + } + }) + } +} diff --git a/notify/notify.go b/notify/notify.go index 3973e7876b..6ab54a7ba2 100644 --- a/notify/notify.go +++ b/notify/notify.go @@ -365,6 +365,7 @@ func (m *Metrics) InitializeFor(receiver map[string][]Integration) { "webex", "msteams", "msteamsv2", + "incidentio", "jira", "rocketchat", } { From 07604e33a53bcaf410ae318906e5095474213904 Mon Sep 17 00:00:00 2001 From: Rory Malcolm Date: Tue, 6 May 2025 23:44:00 +0100 Subject: [PATCH 2/2] Address review comments - Made a change to demarcate `AlertSourceToken` as required - Now error if an authorization header is set, and a alert_source_token is set - Ensure the alert source endpoint is no longer secret - Add documentation for the incidentio_config Signed-off-by: Rory Malcolm --- config/notifiers.go | 14 ++++++++-- docs/configuration.md | 36 ++++++++++++++++++++++++ notify/incidentio/incidentio.go | 41 ++++++++++++++-------------- notify/incidentio/incidentio_test.go | 27 ++++++++++-------- 4 files changed, 84 insertions(+), 34 deletions(-) diff --git a/config/notifiers.go b/config/notifiers.go index 0a94a0ae7c..cfe148f1e0 100644 --- a/config/notifiers.go +++ b/config/notifiers.go @@ -535,8 +535,8 @@ type IncidentioConfig struct { HTTPConfig *commoncfg.HTTPClientConfig `yaml:"http_config,omitempty" json:"http_config,omitempty"` // URL to send POST request to. - URL *SecretURL `yaml:"url" json:"url"` - URLFile string `yaml:"url_file" json:"url_file"` + URL *URL `yaml:"url" json:"url"` + URLFile string `yaml:"url_file" json:"url_file"` // AlertSourceToken is the key used to authenticate with the alert source in incident.io. AlertSourceToken Secret `yaml:"alert_source_token,omitempty" json:"alert_source_token,omitempty"` @@ -544,7 +544,9 @@ type IncidentioConfig struct { // MaxAlerts is the maximum number of alerts to be sent per incident.io message. // Alerts exceeding this threshold will be truncated. Setting this to 0 - // allows an unlimited number of alerts. + // allows an unlimited number of alerts. Note that if the payload exceeds + // incident.io's size limits, you will receive a 429 response and alerts + // will not be ingested. MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"` // Timeout is the maximum time allowed to invoke incident.io. Setting this to 0 @@ -568,6 +570,12 @@ func (c *IncidentioConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro if c.AlertSourceToken != "" && c.AlertSourceTokenFile != "" { return errors.New("at most one of alert_source_token & alert_source_token_file must be configured") } + if c.AlertSourceToken == "" && c.AlertSourceTokenFile == "" { + return errors.New("one of alert_source_token or alert_source_token_file must be configured") + } + if c.HTTPConfig != nil && c.HTTPConfig.Authorization != nil && (c.AlertSourceToken != "" || c.AlertSourceTokenFile != "") { + return errors.New("cannot specify both alert_source_token/alert_source_token_file and http_config.authorization") + } return nil } diff --git a/docs/configuration.md b/docs/configuration.md index 731437f765..ee1e047815 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -748,6 +748,8 @@ opsgenie_configs: [ - , ... ] pagerduty_configs: [ - , ... ] +incidentio_configs: + [ - , ... ] pushover_configs: [ - , ... ] rocketchat_configs: @@ -1673,6 +1675,40 @@ There is a list of [integrations](https://prometheus.io/docs/operating/integrations/#alertmanager-webhook-receiver) with this feature. +### `` + +incident.io notifications are sent via the [incident.io Alert Sources API](https://incident.io/docs/api/alert-sources). + +```yaml +# Whether to notify about resolved alerts. +[ send_resolved: | default = true ] + +# The HTTP client's configuration. +[ http_config: | default = global.http_config ] + +# The URL to send the incident.io alert. This would typically be provided by the +# incident.io team when setting up an alert source. +# URL and URL_file are mutually exclusive. +url: +url_file: + +# The alert source token is used to authenticate with incident.io. +# alert_source_token and alert_source_token_file are mutually exclusive. +[ alert_source_token: ] +[ alert_source_token_file: ] + +# The maximum number of alerts to be sent per incident.io message. +# Alerts exceeding this threshold will be truncated. Setting this to 0 +# allows an unlimited number of alerts. Note that if the payload exceeds +# incident.io's size limits, you will receive a 429 response and alerts +# will not be ingested. +[ max_alerts: | default = 0 ] + +# Timeout is the maximum time allowed to invoke incident.io. Setting this to 0 +# does not impose a timeout. +[ timeout: | default = 0s ] +``` + ### `` WeChat notifications are sent via the [WeChat diff --git a/notify/incidentio/incidentio.go b/notify/incidentio/incidentio.go index 39f1db6cc0..cf0d347c4f 100644 --- a/notify/incidentio/incidentio.go +++ b/notify/incidentio/incidentio.go @@ -17,6 +17,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" "log/slog" @@ -43,17 +44,26 @@ type Notifier struct { // New returns a new incident.io notifier. func New(conf *config.IncidentioConfig, t *template.Template, l *slog.Logger, httpOpts ...commoncfg.HTTPClientOption) (*Notifier, error) { - // If alert source token is specified, set authorization in HTTP config + // Handle authentication configuration if conf.HTTPConfig == nil { conf.HTTPConfig = &commoncfg.HTTPClientConfig{} } + // Ensure one of AlertSourceToken or AlertSourceTokenFile is provided + if conf.AlertSourceToken == "" && conf.AlertSourceTokenFile == "" { + return nil, errors.New("one of alert_source_token or alert_source_token_file must be configured") + } + + // Error if authorization is already set in HTTPConfig + if conf.HTTPConfig.Authorization != nil { + return nil, errors.New("cannot specify both alert_source_token/alert_source_token_file and http_config.authorization") + } + + // Set authorization from token or token file if conf.AlertSourceToken != "" { - if conf.HTTPConfig.Authorization == nil { - conf.HTTPConfig.Authorization = &commoncfg.Authorization{ - Type: "Bearer", - Credentials: commoncfg.Secret(conf.AlertSourceToken), - } + conf.HTTPConfig.Authorization = &commoncfg.Authorization{ + Type: "Bearer", + Credentials: commoncfg.Secret(conf.AlertSourceToken), } } else if conf.AlertSourceTokenFile != "" { content, err := os.ReadFile(conf.AlertSourceTokenFile) @@ -61,11 +71,9 @@ func New(conf *config.IncidentioConfig, t *template.Template, l *slog.Logger, ht return nil, fmt.Errorf("failed to read alert_source_token_file: %w", err) } - if conf.HTTPConfig.Authorization == nil { - conf.HTTPConfig.Authorization = &commoncfg.Authorization{ - Type: "Bearer", - Credentials: commoncfg.Secret(strings.TrimSpace(string(content))), - } + conf.HTTPConfig.Authorization = &commoncfg.Authorization{ + Type: "Bearer", + Credentials: commoncfg.Secret(strings.TrimSpace(string(content))), } } @@ -83,10 +91,6 @@ func New(conf *config.IncidentioConfig, t *template.Template, l *slog.Logger, ht retrier: ¬ify.Retrier{ RetryCodes: []int{ http.StatusTooManyRequests, // 429 - http.StatusInternalServerError, - http.StatusBadGateway, - http.StatusServiceUnavailable, - http.StatusGatewayTimeout, }, CustomDetailsFunc: errDetails, }, @@ -124,7 +128,7 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er n.logger.Debug("incident.io notification", "groupKey", groupKey) msg := &Message{ - Version: "4", + Version: "1", Data: data, GroupKey: groupKey.String(), TruncatedAlerts: numTruncated, @@ -197,8 +201,5 @@ func errDetails(status int, body io.Reader) string { parts = append(parts, strings.Join(errorResponse.Errors, ", ")) } - if len(parts) > 0 { - return strings.Join(parts, ": ") - } - return "" + return strings.Join(parts, ": ") } diff --git a/notify/incidentio/incidentio_test.go b/notify/incidentio/incidentio_test.go index 5456221322..730897b4f8 100644 --- a/notify/incidentio/incidentio_test.go +++ b/notify/incidentio/incidentio_test.go @@ -39,8 +39,9 @@ import ( func TestIncidentIORetry(t *testing.T) { notifier, err := New( &config.IncidentioConfig{ - URL: &config.SecretURL{URL: &url.URL{Scheme: "https", Host: "example.com"}}, - HTTPConfig: &commoncfg.HTTPClientConfig{}, + URL: &config.URL{URL: &url.URL{Scheme: "https", Host: "example.com"}}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + AlertSourceToken: "test-token", }, test.CreateTmpl(t), promslog.NewNopLogger(), @@ -60,8 +61,9 @@ func TestIncidentIORedactedURL(t *testing.T) { notifier, err := New( &config.IncidentioConfig{ - URL: &config.SecretURL{URL: u}, - HTTPConfig: &commoncfg.HTTPClientConfig{}, + URL: &config.URL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + AlertSourceToken: "test-token", }, test.CreateTmpl(t), promslog.NewNopLogger(), @@ -82,8 +84,9 @@ func TestIncidentIOURLFromFile(t *testing.T) { notifier, err := New( &config.IncidentioConfig{ - URLFile: f.Name(), - HTTPConfig: &commoncfg.HTTPClientConfig{}, + URLFile: f.Name(), + HTTPConfig: &commoncfg.HTTPClientConfig{}, + AlertSourceToken: "test-token", }, test.CreateTmpl(t), promslog.NewNopLogger(), @@ -122,7 +125,7 @@ func TestIncidentIONotify(t *testing.T) { require.NoError(t, json.NewDecoder(r.Body).Decode(&msg)) // Verify required fields - require.Equal(t, "4", msg.Version) + require.Equal(t, "1", msg.Version) require.NotEmpty(t, msg.GroupKey) w.WriteHeader(http.StatusOK) }, @@ -134,8 +137,9 @@ func TestIncidentIONotify(t *testing.T) { notifier, err := New( &config.IncidentioConfig{ - URL: &config.SecretURL{URL: u}, - HTTPConfig: &commoncfg.HTTPClientConfig{}, + URL: &config.URL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + AlertSourceToken: "test-token", }, test.CreateTmpl(t), promslog.NewNopLogger(), @@ -214,8 +218,9 @@ func TestIncidentIORetryScenarios(t *testing.T) { notifier, err := New( &config.IncidentioConfig{ - URL: &config.SecretURL{URL: u}, - HTTPConfig: &commoncfg.HTTPClientConfig{}, + URL: &config.URL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + AlertSourceToken: "test-token", }, test.CreateTmpl(t), promslog.NewNopLogger(),