Skip to content

Commit d2103a3

Browse files
authored
Add Multi-DC Support (#5)
* Add support for specifying multiple datacenters --------- Co-authored-by: dan.markhasin <[email protected]>
1 parent 61ec1cb commit d2103a3

13 files changed

+459
-109
lines changed

.golangci.yml

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ linters-settings:
1010
- (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
1111
- (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
1212
- (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
13-
golint:
14-
min-confidence: 0
1513
gocyclo:
1614
min-complexity: 15
1715
maligned:
@@ -49,6 +47,7 @@ linters-settings:
4947
- ifElseChain
5048
- octalLiteral
5149
- wrapperFunc
50+
- whyNoLint
5251
funlen:
5352
lines: 100
5453
statements: 50
@@ -60,7 +59,6 @@ linters:
6059
enable:
6160
# - rowserrcheck
6261
- bodyclose
63-
- deadcode
6462
- depguard
6563
- dogsled
6664
- dupl
@@ -71,31 +69,21 @@ linters:
7169
- gocyclo
7270
- gofmt
7371
- goimports
74-
- golint
72+
- revive
7573
- gosec
7674
- gosimple
7775
- govet
7876
- ineffassign
79-
- interfacer
8077
- lll
8178
- misspell
8279
- nakedret
83-
- scopelint
80+
- exportloopref
8481
- staticcheck
85-
- structcheck
8682
- stylecheck
8783
- typecheck
8884
- unconvert
8985
- unparam
9086
- unused
91-
- varcheck
92-
93-
# don't enable:
94-
# - gochecknoglobals
95-
# - gocognit
96-
# - godox
97-
# - maligned
98-
# - prealloc
9987

10088
# golangci.com configuration
10189
# https://github.com/golangci/golangci/wiki/Configuration
@@ -105,5 +93,5 @@ service:
10593
- echo "here I can run custom commands, but no preparation needed for this repo"
10694

10795
run:
108-
skip-dirs:
109-
- internal/dto
96+
skip-files:
97+
- consul_test.go

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
LOCAL_GOPATH=${ROOT_DIR}/.gopath
22
BIN_DIR := .tools/bin
3-
GOLANGCI_LINT_VERSION := 1.40.1
3+
GOLANGCI_LINT_VERSION := 1.52.2
44
GOLANGCI_LINT := $(BIN_DIR)/golangci-lint_$(GOLANGCI_LINT_VERSION)
55

66
.PHONY: test

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ If set to true, the transport will attempt to resolve the address by delegating
8585

8686
* TLS - in order to support TLS, you can provide a custom Base `http.Transport` with the `ServerName` in it's `TLSClientConfig` set to the hostname presented by your certificate.
8787

88+
### Multi-DC Support
89+
The library provides support for multiple data centers by specifying a list of fallback data-centers to use.
90+
If no instances are available in the local data center, the library will select instances from one of the fallback data-centers, prioritized by the order of data-centers provided by the user in the `FallbackDatacenters` property of the `ResolverConfig` struct.
91+
92+
93+
8894
# Example
8995

9096
```go

config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,7 @@ type ResolverConfig struct {
6464
// The consul query options configuration
6565
// Optional
6666
Query *api.QueryOptions
67+
// A list of datacenters to query, ordered by priority.
68+
// Optional. Will use only the local DC if not provided.
69+
FallbackDatacenters []string
6770
}

consul.go

Lines changed: 117 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,28 @@ import (
55
"fmt"
66
"log"
77
"math"
8+
"reflect"
9+
"sort"
810
"sync"
911
"time"
1012

13+
"github.com/mitchellh/mapstructure"
14+
1115
"github.com/AppsFlyer/go-consul-resolver/lb"
1216
"github.com/cenkalti/backoff/v4"
1317
"github.com/friendsofgo/errors"
1418
"github.com/hashicorp/consul/api"
1519
"go.uber.org/ratelimit"
1620
)
1721

22+
type agentConfig struct {
23+
DC string `mapstructure:"Datacenter"`
24+
}
25+
26+
type agentSelf struct {
27+
Config agentConfig `mapstructure:"Config"`
28+
}
29+
1830
// Balancer interface provides methods for selecting a target and updating its state
1931
type Balancer interface {
2032
// Select returns a *api.ServiceEntry describing the selected target.
@@ -31,14 +43,16 @@ type ServiceProvider interface {
3143
}
3244

3345
type ServiceResolver struct {
34-
log LogFn
35-
ctx context.Context
36-
client ServiceProvider
37-
queryOpts *api.QueryOptions
38-
balancer Balancer
39-
spec ServiceSpec
40-
init chan struct{}
41-
initDone sync.Once
46+
log LogFn
47+
ctx context.Context
48+
client ServiceProvider
49+
queryOpts *api.QueryOptions
50+
balancer Balancer
51+
spec ServiceSpec
52+
prioritizedInstances [][]*api.ServiceEntry
53+
mu sync.Mutex
54+
init chan struct{}
55+
initDone sync.Once
4256
}
4357

4458
// NewConsulResolver creates a new Consul Resolver
@@ -69,18 +83,40 @@ func NewConsulResolver(ctx context.Context, conf ResolverConfig) (*ServiceResolv
6983
conf.Log = log.Printf
7084
}
7185

86+
datacenters := []string{""}
87+
if len(conf.FallbackDatacenters) > 0 {
88+
seen := map[string]struct{}{}
89+
// Exclude the local datacenter from the list of fallback datacenters
90+
localDC, err := getLocalDatacenter(conf.Client.Agent())
91+
if err != nil {
92+
return nil, errors.Wrap(err, "failed determining local consul datacenter")
93+
}
94+
95+
for _, dc := range conf.FallbackDatacenters {
96+
if _, ok := seen[dc]; ok || dc == localDC {
97+
continue
98+
}
99+
seen[dc] = struct{}{}
100+
datacenters = append(datacenters, dc)
101+
}
102+
}
103+
72104
resolver := &ServiceResolver{
73-
log: conf.Log,
74-
ctx: ctx,
75-
queryOpts: conf.Query,
76-
spec: conf.ServiceSpec,
77-
client: conf.Client.Health(),
78-
balancer: conf.Balancer,
79-
init: make(chan struct{}),
80-
initDone: sync.Once{},
105+
log: conf.Log,
106+
ctx: ctx,
107+
queryOpts: conf.Query,
108+
spec: conf.ServiceSpec,
109+
client: conf.Client.Health(),
110+
balancer: conf.Balancer,
111+
prioritizedInstances: make([][]*api.ServiceEntry, len(datacenters)),
112+
init: make(chan struct{}),
113+
initDone: sync.Once{},
81114
}
82115

83-
go resolver.populateFromConsul()
116+
// Always prepend the local datacenter with the highest priority
117+
for priority, dc := range datacenters {
118+
go resolver.populateFromConsul(dc, priority)
119+
}
84120

85121
return resolver, nil
86122
}
@@ -127,13 +163,16 @@ func (r *ServiceResolver) Resolve(ctx context.Context) (ServiceAddress, error) {
127163
return ServiceAddress{Host: host, Port: port}, nil
128164
}
129165

130-
func (r *ServiceResolver) populateFromConsul() {
166+
func (r *ServiceResolver) populateFromConsul(dcName string, dcPriority int) {
131167
rl := ratelimit.New(1) // limit consul queries to 1 per second
132168
bck := backoff.NewExponentialBackOff()
133169
bck.MaxElapsedTime = 0
134170
bck.MaxInterval = time.Second * 30
135171

136-
r.queryOpts.WaitIndex = 0
172+
q := *r.queryOpts
173+
174+
q.WaitIndex = 0
175+
q.Datacenter = dcName
137176
for r.ctx.Err() == nil {
138177
rl.Take()
139178
err := backoff.RetryNotify(
@@ -142,18 +181,21 @@ func (r *ServiceResolver) populateFromConsul() {
142181
r.spec.ServiceName,
143182
r.spec.Tags,
144183
!r.spec.IncludeUnhealthy,
145-
r.queryOpts,
184+
&q,
146185
)
147186
if err != nil {
148187
return err
149188
}
150-
if meta.LastIndex < r.queryOpts.WaitIndex {
151-
r.queryOpts.WaitIndex = 0
189+
if meta.LastIndex < q.WaitIndex {
190+
q.WaitIndex = 0
152191
} else {
153-
r.queryOpts.WaitIndex = uint64(math.Max(float64(1), float64(meta.LastIndex)))
192+
q.WaitIndex = uint64(math.Max(float64(1), float64(meta.LastIndex)))
193+
}
194+
195+
if targets, shouldUpdate := r.getTargetsForUpdate(se, dcPriority); shouldUpdate {
196+
r.balancer.UpdateTargets(targets)
154197
}
155198

156-
r.balancer.UpdateTargets(se)
157199
r.initDone.Do(func() {
158200
close(r.init)
159201
})
@@ -170,3 +212,54 @@ func (r *ServiceResolver) populateFromConsul() {
170212
}
171213
r.log("[Consul Resolver] context canceled, stopping consul watcher")
172214
}
215+
216+
// getTargetsForUpdate will update the LB only if:
217+
// - The DC has healthy nodes
218+
// - No DC with higher priority has healthy nodes
219+
func (r *ServiceResolver) getTargetsForUpdate(se []*api.ServiceEntry, priority int) ([]*api.ServiceEntry, bool) {
220+
sort.SliceStable(se, func(i, j int) bool {
221+
return se[i].Node.ID < se[j].Node.ID
222+
})
223+
224+
r.mu.Lock()
225+
defer r.mu.Unlock()
226+
227+
var found bool
228+
// check if the target list is unchanged
229+
if reflect.DeepEqual(se, r.prioritizedInstances[priority]) {
230+
return nil, false
231+
}
232+
r.prioritizedInstances[priority] = se
233+
for i := 0; i <= len(r.prioritizedInstances)-1; i++ {
234+
if len(r.prioritizedInstances[i]) == 0 {
235+
continue
236+
}
237+
found = true
238+
if priority > i {
239+
break
240+
}
241+
242+
return r.prioritizedInstances[i], true
243+
}
244+
245+
// If no DC has any nodes, return an empty slice and signal the caller that an update is needed
246+
if !found {
247+
return se, true
248+
}
249+
250+
return se, false
251+
}
252+
253+
func getLocalDatacenter(c *api.Agent) (string, error) {
254+
res, err := c.Self()
255+
if err != nil {
256+
return "", errors.Wrap(err, "failed querying agent")
257+
}
258+
259+
var self agentSelf
260+
if err := mapstructure.Decode(res, &self); err != nil {
261+
return "", errors.Wrap(err, "failed decoding agent configuration")
262+
}
263+
264+
return self.Config.DC, nil
265+
}

0 commit comments

Comments
 (0)