Add option to enable TCP check with Consul registry

One disadvantage of using TTL based health check is the high network
traffic between Consul agent (either between servers, or between server
and client).

In order for the services considered alive by Consul, microservices must
send an update TTL to Consul every n seconds (currently 30 seconds).

Here is the explanation about TTL check from Consul documentation [1]

    Time to Live (TTL) - These checks retain their last known state for a
    given TTL. The state of the check must be updated periodically over
    the HTTP interface. If an external system fails to update the status
    within a given TTL, the check is set to the failed state. This
    mechanism, conceptually similar to a dead man's switch, relies on the
    application to directly report its health. For example, a healthy app
    can periodically PUT a status update to the HTTP endpoint; if the app
    fails, the TTL will expire and the health check enters a critical
    state. The endpoints used to update health information for a given
    check are the pass endpoint and the fail endpoint. TTL checks also
    persist their last known status to disk. This allows the Consul agent
    to restore the last known status of the check across restarts.
    Persisted check status is valid through the end of the TTL from the
    time of the last check.


Hint:

    TTL checks also persist their last known status to disk. This allows
    the Consul agent to restore the last known status of the check
    across restarts.

When microservices update the TTL, Consul will write to disk. Writing to
disk means all other slaves need to replicate it, which means master need
to inform other standby Consul to pull the new catalog. Hence, the
increased traffic.

More information about this issue can be viewed at Consul mailing list [2].

[1] https://www.consul.io/docs/agent/checks.html
[2] https://groups.google.com/forum/#!topic/consul-tool/84h7qmCCpjg
This commit is contained in:
Shulhan 2018-03-14 18:51:38 +07:00
parent a941a4772b
commit 1599d717af
4 changed files with 68 additions and 11 deletions

View File

@ -115,6 +115,19 @@ func (c *consulRegistry) Deregister(s *Service) error {
return c.Client.Agent().ServiceDeregister(node.Id)
}
func getDeregisterTTL(t time.Duration) time.Duration {
// splay slightly for the watcher?
splay := time.Second * 5
deregTTL := t + splay
// consul has a minimum timeout on deregistration of 1 minute.
if t < time.Minute {
deregTTL = time.Minute + splay
}
return deregTTL
}
func (c *consulRegistry) Register(s *Service, opts ...RegisterOption) error {
if len(s.Nodes) == 0 {
return errors.New("Require at least one node")
@ -155,16 +168,19 @@ func (c *consulRegistry) Register(s *Service, opts ...RegisterOption) error {
var check *consul.AgentServiceCheck
// if the TTL is greater than 0 create an associated check
if options.TTL > time.Duration(0) {
// splay slightly for the watcher?
splay := time.Second * 5
deregTTL := options.TTL + splay
// consul has a minimum timeout on deregistration of 1 minute.
if options.TTL < time.Minute {
deregTTL = time.Minute + splay
if options.TCPCheck {
deregTTL := getDeregisterTTL(options.Interval)
check = &consul.AgentServiceCheck{
TCP: c.Address,
Interval: fmt.Sprintf("%v", options.Interval),
DeregisterCriticalServiceAfter: fmt.Sprintf("%v", deregTTL),
}
// if the TTL is greater than 0 create an associated check
} else if options.TTL > time.Duration(0) {
deregTTL := getDeregisterTTL(options.TTL)
check = &consul.AgentServiceCheck{
TTL: fmt.Sprintf("%v", options.TTL),
DeregisterCriticalServiceAfter: fmt.Sprintf("%v", deregTTL),

View File

@ -18,7 +18,9 @@ type Options struct {
}
type RegisterOptions struct {
TTL time.Duration
TCPCheck bool
TTL time.Duration
Interval time.Duration
// Other options for implementations of the interface
// can be stored in a context
Context context.Context
@ -66,6 +68,23 @@ func RegisterTTL(t time.Duration) RegisterOption {
}
}
//
// RegisterTCPCheck will tell the service provider to check the service address
// and port every `t` interval. It will enabled only if `t` is greater than 0.
// This option is for registry using Consul, see `TCP + Interval` more
// information [1].
//
// [1] https://www.consul.io/docs/agent/checks.html
//
func RegisterTCPCheck(t time.Duration) RegisterOption {
return func(o *RegisterOptions) {
if t > time.Duration(0) {
o.TCPCheck = true
o.Interval = t
}
}
}
// Watch a service
func WatchService(name string) WatchOption {
return func(o *WatchOptions) {

View File

@ -25,7 +25,9 @@ type Options struct {
HdlrWrappers []HandlerWrapper
SubWrappers []SubscriberWrapper
RegisterTTL time.Duration
RegisterTCPCheck bool
RegisterTTL time.Duration
RegisterInterval time.Duration
// Debug Handler which can be set by a user
DebugHandler debug.DebugHandler
@ -164,6 +166,23 @@ func RegisterTTL(t time.Duration) Option {
}
}
//
// RegisterTCPCheck will tell the service provider to check the service address
// and port every `t` interval. It will enabled only if `t` is greater than 0.
// This option is for registry using Consul, see `TCP + Interval` more
// information [1].
//
// [1] https://www.consul.io/docs/agent/checks.html
//
func RegisterTCPCheck(t time.Duration) Option {
return func(o *Options) {
if t > time.Duration(0) {
o.RegisterTCPCheck = true
o.RegisterInterval = t
}
}
}
// Wait tells the server to wait for requests to finish before exiting
func Wait(b bool) Option {
return func(o *Options) {

View File

@ -278,7 +278,10 @@ func (s *rpcServer) Register() error {
}
// create registry options
rOpts := []registry.RegisterOption{registry.RegisterTTL(config.RegisterTTL)}
rOpts := []registry.RegisterOption{
registry.RegisterTTL(config.RegisterTTL),
registry.RegisterTCPCheck(config.RegisterInterval),
}
if err := config.Registry.Register(service, rOpts...); err != nil {
return err