Monitor all services in the monitor

This commit is contained in:
Asim Aslam 2019-08-06 19:02:57 +01:00
parent bc751c55fb
commit eae32176c4
3 changed files with 158 additions and 51 deletions

View File

@ -20,6 +20,7 @@ type monitor struct {
client client.Client client client.Client
sync.RWMutex sync.RWMutex
running bool
services map[string]*Status services map[string]*Status
} }
@ -97,6 +98,124 @@ func (m *monitor) check(service string) (*Status, error) {
}, nil }, nil
} }
func (m *monitor) reap() {
services, err := m.registry.ListServices()
if err != nil {
return
}
serviceMap := make(map[string]bool)
for _, service := range services {
serviceMap[service.Name] = true
}
m.Lock()
defer m.Unlock()
// range over our watched services
for service, _ := range m.services {
// check if the service exists in the registry
if !serviceMap[service] {
// if not, delete it in our status map
delete(m.services, service)
}
}
}
func (m *monitor) run() {
// check the status every tick
t := time.NewTicker(time.Minute)
defer t.Stop()
// reap dead services
t2 := time.NewTicker(time.Hour)
defer t2.Stop()
// list the known services
services, _ := m.registry.ListServices()
// create a check chan of same length
check := make(chan string, len(services))
// front-load the services to watch
for _, service := range services {
check <- service.Name
}
for {
select {
// exit if we're told to
case <-m.exit:
return
// check a service when told to
case service := <-check:
// check the status
status, err := m.check(service)
if err != nil {
status = &Status{
Code: StatusUnknown,
Info: "unknown status",
}
}
// save the status
m.Lock()
m.services[service] = status
m.Unlock()
// on the tick interval get all services and issue a check
case <-t.C:
// create a list of services
serviceMap := make(map[string]bool)
m.RLock()
for service, _ := range m.services {
serviceMap[service] = true
}
m.RUnlock()
go func() {
// check the status of all watched services
for service, _ := range serviceMap {
select {
case <-m.exit:
return
case check <- service:
default:
// barf if we block
}
}
// list services
services, _ := m.registry.ListServices()
for _, service := range services {
// start watching the service
if ok := serviceMap[service.Name]; !ok {
m.Watch(service.Name)
}
}
}()
case <-t2.C:
// reap any dead/non-existent services
m.reap()
}
}
}
func (m *monitor) Reap(service string) error {
services, err := m.registry.GetService(service)
if err != nil {
return nil
}
m.Lock()
defer m.Unlock()
delete(m.services, service)
for _, service := range services {
m.registry.Deregister(service)
}
return nil
}
func (m *monitor) Status(service string) (Status, error) { func (m *monitor) Status(service string) (Status, error) {
m.RLock() m.RLock()
defer m.RUnlock() defer m.RUnlock()
@ -126,10 +245,36 @@ func (m *monitor) Watch(service string) error {
return nil return nil
} }
func (m *monitor) Run() error {
m.Lock()
defer m.Unlock()
if m.running {
return nil
}
// reset the exit channel
m.exit = make(chan bool)
// setup a new cache
m.registry = cache.New(m.options.Registry)
// start running
go m.run()
// set to running
m.running = true
return nil
}
func (m *monitor) Stop() error { func (m *monitor) Stop() error {
m.Lock() m.Lock()
defer m.Unlock() defer m.Unlock()
if !m.running {
return nil
}
select { select {
case <-m.exit: case <-m.exit:
return nil return nil
@ -139,58 +284,13 @@ func (m *monitor) Stop() error {
delete(m.services, s) delete(m.services, s)
} }
m.registry.Stop() m.registry.Stop()
m.running = false
return nil return nil
} }
return nil return nil
} }
func (m *monitor) run() {
// check the status every tick
t := time.NewTicker(time.Minute)
defer t.Stop()
check := make(chan string)
for {
select {
case <-m.exit:
return
case service := <-check:
// check the status
status, err := m.check(service)
if err != nil {
status = &Status{
Code: StatusUnknown,
Info: "unknown status",
}
}
// save the status
m.Lock()
m.services[service] = status
m.Unlock()
case <-t.C:
// create a list of services
var services []string
m.RLock()
for service, _ := range m.services {
services = append(services, service)
}
m.RUnlock()
// check the status of all watched services
for _, service := range services {
select {
case <-m.exit:
return
case check <- service:
}
}
}
}
}
func newMonitor(opts ...Option) Monitor { func newMonitor(opts ...Option) Monitor {
options := Options{ options := Options{
Client: client.DefaultClient, Client: client.DefaultClient,
@ -201,14 +301,11 @@ func newMonitor(opts ...Option) Monitor {
o(&options) o(&options)
} }
m := &monitor{ return &monitor{
options: options, options: options,
exit: make(chan bool), exit: make(chan bool),
client: options.Client, client: options.Client,
registry: cache.New(options.Registry), registry: cache.New(options.Registry),
services: make(map[string]*Status), services: make(map[string]*Status),
} }
go m.run()
return m
} }

View File

@ -8,6 +8,10 @@ func TestMonitor(t *testing.T) {
// create new monitor // create new monitor
m := NewMonitor() m := NewMonitor()
if err := m.Run(); err != nil {
t.Fatalf("failed to stop monitor: %v", err)
}
services := []string{"foo", "bar", "baz"} services := []string{"foo", "bar", "baz"}
for _, service := range services { for _, service := range services {
@ -27,5 +31,7 @@ func TestMonitor(t *testing.T) {
} }
// stop monitor // stop monitor
m.Stop() if err := m.Stop(); err != nil {
t.Fatalf("failed to stop monitor: %v", err)
}
} }

View File

@ -15,10 +15,14 @@ type StatusCode int
// Monitor monitors a service and reaps dead instances // Monitor monitors a service and reaps dead instances
type Monitor interface { type Monitor interface {
// Reap a service and stop monitoring
Reap(service string) error
// Status of the service // Status of the service
Status(service string) (Status, error) Status(service string) (Status, error)
// Watch starts watching the service // Watch starts watching the service
Watch(service string) error Watch(service string) error
// Run the monitor to watch all services
Run() error
// Stop monitoring // Stop monitoring
Stop() error Stop() error
} }