599 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			599 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package router
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"math"
 | |
| 	"sort"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/micro/go-micro/network/router/table"
 | |
| 	"github.com/micro/go-micro/registry"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// AdvertiseEventsTick is time interval in which the router advertises route updates
 | |
| 	AdvertiseEventsTick = 5 * time.Second
 | |
| 	// AdvertiseTableTick is time interval in which router advertises all routes found in routing table
 | |
| 	AdvertiseTableTick = 1 * time.Minute
 | |
| 	// AdvertSuppress is advert suppression threshold
 | |
| 	AdvertSuppress = 2000
 | |
| 	// AdvertRecover is advert recovery threshold
 | |
| 	AdvertRecover = 750
 | |
| 	// DefaultAdvertTTL is default advertisement TTL
 | |
| 	DefaultAdvertTTL = 1 * time.Minute
 | |
| 	// PenaltyDecay is the penalty decay
 | |
| 	PenaltyDecay = 1.15
 | |
| 	// Delete penalises route addition and deletion
 | |
| 	Delete = 1000
 | |
| 	// UpdatePenalty penalises route updates
 | |
| 	UpdatePenalty = 500
 | |
| )
 | |
| 
 | |
| // router provides default router implementation
 | |
| type router struct {
 | |
| 	// embed the table
 | |
| 	table.Table
 | |
| 	opts       Options
 | |
| 	status     Status
 | |
| 	exit       chan struct{}
 | |
| 	eventChan  chan *table.Event
 | |
| 	advertChan chan *Advert
 | |
| 	advertWg   *sync.WaitGroup
 | |
| 	wg         *sync.WaitGroup
 | |
| 	sync.RWMutex
 | |
| }
 | |
| 
 | |
| // newRouter creates a new router and returns it
 | |
| func newRouter(opts ...Option) Router {
 | |
| 	// get default options
 | |
| 	options := DefaultOptions()
 | |
| 
 | |
| 	// apply requested options
 | |
| 	for _, o := range opts {
 | |
| 		o(&options)
 | |
| 	}
 | |
| 
 | |
| 	return &router{
 | |
| 		Table:      options.Table,
 | |
| 		opts:       options,
 | |
| 		status:     Status{Error: nil, Code: Stopped},
 | |
| 		exit:       make(chan struct{}),
 | |
| 		eventChan:  make(chan *table.Event),
 | |
| 		advertChan: make(chan *Advert),
 | |
| 		advertWg:   &sync.WaitGroup{},
 | |
| 		wg:         &sync.WaitGroup{},
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Init initializes router with given options
 | |
| func (r *router) Init(opts ...Option) error {
 | |
| 	for _, o := range opts {
 | |
| 		o(&r.opts)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Options returns router options
 | |
| func (r *router) Options() Options {
 | |
| 	return r.opts
 | |
| }
 | |
| 
 | |
| // manageRoute applies route action on the routing table
 | |
| func (r *router) manageRoute(route table.Route, action string) error {
 | |
| 	switch action {
 | |
| 	case "create":
 | |
| 		if err := r.Create(route); err != nil && err != table.ErrDuplicateRoute {
 | |
| 			return fmt.Errorf("failed adding route for service %s: %s", route.Service, err)
 | |
| 		}
 | |
| 	case "update":
 | |
| 		if err := r.Update(route); err != nil && err != table.ErrDuplicateRoute {
 | |
| 			return fmt.Errorf("failed updating route for service %s: %s", route.Service, err)
 | |
| 		}
 | |
| 	case "delete":
 | |
| 		if err := r.Delete(route); err != nil && err != table.ErrRouteNotFound {
 | |
| 			return fmt.Errorf("failed deleting route for service %s: %s", route.Service, err)
 | |
| 		}
 | |
| 	default:
 | |
| 		return fmt.Errorf("failed to manage route for service %s. Unknown action: %s", route.Service, action)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // manageServiceRoutes manages routes for a given service.
 | |
| // It returns error of the routing table action fails.
 | |
| func (r *router) manageServiceRoutes(service *registry.Service, action string) error {
 | |
| 	// action is the routing table action
 | |
| 	action = strings.ToLower(action)
 | |
| 
 | |
| 	// take route action on each service node
 | |
| 	for _, node := range service.Nodes {
 | |
| 		route := table.Route{
 | |
| 			Service: service.Name,
 | |
| 			Address: node.Address,
 | |
| 			Gateway: "",
 | |
| 			Network: r.opts.Network,
 | |
| 			Link:    table.DefaultLink,
 | |
| 			Metric:  table.DefaultLocalMetric,
 | |
| 		}
 | |
| 
 | |
| 		if err := r.manageRoute(route, action); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // manageRegistryRoutes manages routes for each service found in the registry.
 | |
| // It returns error if either the services failed to be listed or the routing table action fails.
 | |
| func (r *router) manageRegistryRoutes(reg registry.Registry, action string) error {
 | |
| 	services, err := reg.ListServices()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("failed listing services: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// add each service node as a separate route
 | |
| 	for _, service := range services {
 | |
| 		// get the service to retrieve all its info
 | |
| 		srvs, err := reg.GetService(service.Name)
 | |
| 		if err != nil {
 | |
| 			continue
 | |
| 		}
 | |
| 		// manage the routes for all returned services
 | |
| 		for _, srv := range srvs {
 | |
| 			if err := r.manageServiceRoutes(srv, action); err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // watchServices watches services in given registry and updates the routing table accordingly.
 | |
| // It returns error if the service registry watcher stops or if the routing table can't be updated.
 | |
| func (r *router) watchServices(w registry.Watcher) error {
 | |
| 	// wait in the background for the router to stop
 | |
| 	// when the router stops, stop the watcher and exit
 | |
| 	r.wg.Add(1)
 | |
| 	go func() {
 | |
| 		defer r.wg.Done()
 | |
| 		<-r.exit
 | |
| 		w.Stop()
 | |
| 	}()
 | |
| 
 | |
| 	var watchErr error
 | |
| 
 | |
| 	for {
 | |
| 		res, err := w.Next()
 | |
| 		if err != nil {
 | |
| 			if err != registry.ErrWatcherStopped {
 | |
| 				watchErr = err
 | |
| 			}
 | |
| 			break
 | |
| 		}
 | |
| 
 | |
| 		if err := r.manageServiceRoutes(res.Service, res.Action); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return watchErr
 | |
| }
 | |
| 
 | |
| // watchTable watches routing table entries and either adds or deletes locally registered service to/from network registry
 | |
| // It returns error if the locally registered services either fails to be added/deleted to/from network registry.
 | |
| func (r *router) watchTable(w table.Watcher) error {
 | |
| 	// wait in the background for the router to stop
 | |
| 	// when the router stops, stop the watcher and exit
 | |
| 	r.wg.Add(1)
 | |
| 	go func() {
 | |
| 		defer r.wg.Done()
 | |
| 		<-r.exit
 | |
| 		w.Stop()
 | |
| 	}()
 | |
| 
 | |
| 	var watchErr error
 | |
| 
 | |
| 	for {
 | |
| 		event, err := w.Next()
 | |
| 		if err != nil {
 | |
| 			if err != table.ErrWatcherStopped {
 | |
| 				watchErr = err
 | |
| 			}
 | |
| 			break
 | |
| 		}
 | |
| 
 | |
| 		select {
 | |
| 		case <-r.exit:
 | |
| 			close(r.eventChan)
 | |
| 			return nil
 | |
| 		case r.eventChan <- event:
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// close event channel on error
 | |
| 	close(r.eventChan)
 | |
| 
 | |
| 	return watchErr
 | |
| }
 | |
| 
 | |
| // advertiseEvents advertises events to event subscribers
 | |
| func (r *router) advertiseEvents(advType AdvertType, events []*table.Event) {
 | |
| 	defer r.advertWg.Done()
 | |
| 
 | |
| 	a := &Advert{
 | |
| 		Id:        r.opts.Id,
 | |
| 		Type:      advType,
 | |
| 		TTL:       DefaultAdvertTTL,
 | |
| 		Timestamp: time.Now(),
 | |
| 		Events:    events,
 | |
| 	}
 | |
| 
 | |
| 	select {
 | |
| 	case r.advertChan <- a:
 | |
| 	case <-r.exit:
 | |
| 		return
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // advertiseTable advertises the whole routing table to the network
 | |
| func (r *router) advertiseTable() error {
 | |
| 	// create table advertisement ticker
 | |
| 	ticker := time.NewTicker(AdvertiseTableTick)
 | |
| 
 | |
| 	for {
 | |
| 		select {
 | |
| 		case <-ticker.C:
 | |
| 			// list routing table routes to announce
 | |
| 			routes, err := r.List()
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("failed listing routes: %s", err)
 | |
| 			}
 | |
| 			// collect all the added routes before we attempt to add default gateway
 | |
| 			events := make([]*table.Event, len(routes))
 | |
| 			for i, route := range routes {
 | |
| 				event := &table.Event{
 | |
| 					Type:      table.Update,
 | |
| 					Timestamp: time.Now(),
 | |
| 					Route:     route,
 | |
| 				}
 | |
| 				events[i] = event
 | |
| 			}
 | |
| 
 | |
| 			// advertise all routes as Update events to subscribers
 | |
| 			if len(events) > 0 {
 | |
| 				go func() {
 | |
| 					r.advertWg.Add(1)
 | |
| 					r.advertiseEvents(Update, events)
 | |
| 				}()
 | |
| 			}
 | |
| 		case <-r.exit:
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // isFlapping detects if the event is flapping based on the current and previous event status.
 | |
| func isFlapping(curr, prev *table.Event) bool {
 | |
| 	if curr.Type == table.Update && prev.Type == table.Update {
 | |
| 		return true
 | |
| 	}
 | |
| 
 | |
| 	if curr.Type == table.Create && prev.Type == table.Delete || curr.Type == table.Delete && prev.Type == table.Create {
 | |
| 		return true
 | |
| 	}
 | |
| 
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // advertEvent is a table event enriched with advertisement data
 | |
| type advertEvent struct {
 | |
| 	*table.Event
 | |
| 	// timestamp marks the time the event has been received
 | |
| 	timestamp time.Time
 | |
| 	// penalty is current event penalty
 | |
| 	penalty float64
 | |
| 	// isSuppressed flags if the event should be considered for flap detection
 | |
| 	isSuppressed bool
 | |
| 	// isFlapping marks the event as flapping event
 | |
| 	isFlapping bool
 | |
| }
 | |
| 
 | |
| // processEvents processes routing table events.
 | |
| // It suppresses unhealthy flapping events and advertises healthy events upstream.
 | |
| func (r *router) processEvents() error {
 | |
| 	// ticker to periodically scan event for advertising
 | |
| 	ticker := time.NewTicker(AdvertiseEventsTick)
 | |
| 	// eventMap is a map of advert events
 | |
| 	eventMap := make(map[uint64]*advertEvent)
 | |
| 
 | |
| 	for {
 | |
| 		select {
 | |
| 		case <-ticker.C:
 | |
| 			var events []*table.Event
 | |
| 			// collect all events which are not flapping
 | |
| 			// TODO: decay the events and update suppression
 | |
| 			for key, event := range eventMap {
 | |
| 				if !event.isFlapping && !event.isSuppressed {
 | |
| 					e := new(table.Event)
 | |
| 					*e = *event.Event
 | |
| 					events = append(events, e)
 | |
| 					// this deletes the advertised event from the map
 | |
| 					delete(eventMap, key)
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// advertise all Update events to subscribers
 | |
| 			if len(events) > 0 {
 | |
| 				r.advertWg.Add(1)
 | |
| 				go r.advertiseEvents(Update, events)
 | |
| 			}
 | |
| 		case e := <-r.eventChan:
 | |
| 			// event timestamp
 | |
| 			now := time.Now()
 | |
| 			// if event is nil, continue
 | |
| 			if e == nil {
 | |
| 				continue
 | |
| 			}
 | |
| 
 | |
| 			// determine the event penalty
 | |
| 			var penalty float64
 | |
| 			switch e.Type {
 | |
| 			case table.Update:
 | |
| 				penalty = UpdatePenalty
 | |
| 			case table.Delete:
 | |
| 				penalty = Delete
 | |
| 			}
 | |
| 			// we use route hash as eventMap key
 | |
| 			hash := e.Route.Hash()
 | |
| 			event, ok := eventMap[hash]
 | |
| 			if !ok {
 | |
| 				event = &advertEvent{
 | |
| 					Event:     e,
 | |
| 					penalty:   penalty,
 | |
| 					timestamp: time.Now(),
 | |
| 				}
 | |
| 				eventMap[hash] = event
 | |
| 				continue
 | |
| 			}
 | |
| 			// update penalty for existing event: decay existing and add new penalty
 | |
| 			delta := time.Since(event.timestamp).Seconds()
 | |
| 			event.penalty = event.penalty*math.Exp(-delta) + penalty
 | |
| 			event.timestamp = now
 | |
| 
 | |
| 			// suppress or recover the event based on its current penalty
 | |
| 			if !event.isSuppressed && event.penalty > AdvertSuppress {
 | |
| 				event.isSuppressed = true
 | |
| 			} else if event.penalty < AdvertRecover {
 | |
| 				event.isSuppressed = false
 | |
| 			}
 | |
| 			// if not suppressed decide if if its flapping
 | |
| 			if !event.isSuppressed {
 | |
| 				// detect if its flapping by comparing current and previous event
 | |
| 				event.isFlapping = isFlapping(e, event.Event)
 | |
| 			}
 | |
| 		case <-r.exit:
 | |
| 			// first wait for the advertiser to finish
 | |
| 			r.advertWg.Wait()
 | |
| 			// close the advert channel
 | |
| 			close(r.advertChan)
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// we probably never reach this code path
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // watchErrors watches router errors and takes appropriate actions
 | |
| func (r *router) watchErrors(errChan <-chan error) {
 | |
| 	defer r.wg.Done()
 | |
| 
 | |
| 	var code StatusCode
 | |
| 	var err error
 | |
| 
 | |
| 	select {
 | |
| 	case <-r.exit:
 | |
| 		code = Stopped
 | |
| 	case err = <-errChan:
 | |
| 		code = Error
 | |
| 	}
 | |
| 
 | |
| 	r.Lock()
 | |
| 	defer r.Unlock()
 | |
| 	status := Status{
 | |
| 		Code:  code,
 | |
| 		Error: err,
 | |
| 	}
 | |
| 	r.status = status
 | |
| 
 | |
| 	// stop the router if some error happened
 | |
| 	if err != nil && code != Stopped {
 | |
| 		// this will stop watchers which will close r.advertChan
 | |
| 		close(r.exit)
 | |
| 		// drain the advertise channel
 | |
| 		for range r.advertChan {
 | |
| 		}
 | |
| 		// drain the event channel
 | |
| 		for range r.eventChan {
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| }
 | |
| 
 | |
| // Advertise advertises the routes to the network.
 | |
| // It returns error if any of the launched goroutines fail with error.
 | |
| func (r *router) Advertise() (<-chan *Advert, error) {
 | |
| 	r.Lock()
 | |
| 	defer r.Unlock()
 | |
| 
 | |
| 	if r.status.Code != Running {
 | |
| 		// add all local service routes into the routing table
 | |
| 		if err := r.manageRegistryRoutes(r.opts.Registry, "create"); err != nil {
 | |
| 			return nil, fmt.Errorf("failed adding routes: %s", err)
 | |
| 		}
 | |
| 
 | |
| 		// list routing table routes to announce
 | |
| 		routes, err := r.List()
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed listing routes: %s", err)
 | |
| 		}
 | |
| 		// collect all the added routes before we attempt to add default gateway
 | |
| 		events := make([]*table.Event, len(routes))
 | |
| 		for i, route := range routes {
 | |
| 			event := &table.Event{
 | |
| 				Type:      table.Create,
 | |
| 				Timestamp: time.Now(),
 | |
| 				Route:     route,
 | |
| 			}
 | |
| 			events[i] = event
 | |
| 		}
 | |
| 
 | |
| 		// add default gateway into routing table
 | |
| 		if r.opts.Gateway != "" {
 | |
| 			// note, the only non-default value is the gateway
 | |
| 			route := table.Route{
 | |
| 				Service: "*",
 | |
| 				Address: "*",
 | |
| 				Gateway: r.opts.Gateway,
 | |
| 				Network: "*",
 | |
| 				Metric:  table.DefaultLocalMetric,
 | |
| 			}
 | |
| 			if err := r.Create(route); err != nil {
 | |
| 				return nil, fmt.Errorf("failed adding default gateway route: %s", err)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// NOTE: we only need to recreate these if the router errored or was stopped
 | |
| 		// TODO: These probably dont need to be struct members
 | |
| 		if r.status.Code == Error || r.status.Code == Stopped {
 | |
| 			r.exit = make(chan struct{})
 | |
| 			r.eventChan = make(chan *table.Event)
 | |
| 			r.advertChan = make(chan *Advert)
 | |
| 		}
 | |
| 
 | |
| 		// routing table watcher
 | |
| 		tableWatcher, err := r.Watch()
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed creating routing table watcher: %v", err)
 | |
| 		}
 | |
| 		// service registry watcher
 | |
| 		svcWatcher, err := r.opts.Registry.Watch()
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed creating service registry watcher: %v", err)
 | |
| 		}
 | |
| 
 | |
| 		// error channel collecting goroutine errors
 | |
| 		errChan := make(chan error, 4)
 | |
| 
 | |
| 		r.wg.Add(1)
 | |
| 		go func() {
 | |
| 			defer r.wg.Done()
 | |
| 			// watch local registry and register routes in routine table
 | |
| 			errChan <- r.watchServices(svcWatcher)
 | |
| 		}()
 | |
| 
 | |
| 		r.wg.Add(1)
 | |
| 		go func() {
 | |
| 			defer r.wg.Done()
 | |
| 			// watch local registry and register routes in routing table
 | |
| 			errChan <- r.watchTable(tableWatcher)
 | |
| 		}()
 | |
| 
 | |
| 		r.wg.Add(1)
 | |
| 		go func() {
 | |
| 			defer r.wg.Done()
 | |
| 			// watch routing table events and process them
 | |
| 			errChan <- r.processEvents()
 | |
| 		}()
 | |
| 
 | |
| 		r.advertWg.Add(1)
 | |
| 		go func() {
 | |
| 			defer r.advertWg.Done()
 | |
| 			// advertise the whole routing table
 | |
| 			errChan <- r.advertiseTable()
 | |
| 		}()
 | |
| 
 | |
| 		// advertise your presence
 | |
| 		r.advertWg.Add(1)
 | |
| 		go r.advertiseEvents(Announce, events)
 | |
| 
 | |
| 		// watch for errors and cleanup
 | |
| 		r.wg.Add(1)
 | |
| 		go r.watchErrors(errChan)
 | |
| 
 | |
| 		// mark router as running and set its Error to nil
 | |
| 		r.status = Status{Code: Running, Error: nil}
 | |
| 	}
 | |
| 
 | |
| 	return r.advertChan, nil
 | |
| }
 | |
| 
 | |
| // Process updates the routing table using the advertised values
 | |
| func (r *router) Process(a *Advert) error {
 | |
| 	// NOTE: event sorting might not be necessary
 | |
| 	// copy update events intp new slices
 | |
| 	events := make([]*table.Event, len(a.Events))
 | |
| 	copy(events, a.Events)
 | |
| 	// sort events by timestamp
 | |
| 	sort.Slice(events, func(i, j int) bool {
 | |
| 		return events[i].Timestamp.Before(events[j].Timestamp)
 | |
| 	})
 | |
| 
 | |
| 	for _, event := range events {
 | |
| 		// create a copy of the route
 | |
| 		route := event.Route
 | |
| 		action := event.Type
 | |
| 		if err := r.manageRoute(route, fmt.Sprintf("%s", action)); err != nil {
 | |
| 			return fmt.Errorf("failed applying action %s to routing table: %s", action, err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Status returns router status
 | |
| func (r *router) Status() Status {
 | |
| 	r.RLock()
 | |
| 	defer r.RUnlock()
 | |
| 
 | |
| 	// make a copy of the status
 | |
| 	status := r.status
 | |
| 
 | |
| 	return status
 | |
| }
 | |
| 
 | |
| // Stop stops the router
 | |
| func (r *router) Stop() error {
 | |
| 	r.RLock()
 | |
| 	// only close the channel if the router is running
 | |
| 	if r.status.Code == Running {
 | |
| 		// notify all goroutines to finish
 | |
| 		close(r.exit)
 | |
| 		// drain the advertise channel
 | |
| 		for range r.advertChan {
 | |
| 		}
 | |
| 		// drain the event channel
 | |
| 		for range r.eventChan {
 | |
| 		}
 | |
| 	}
 | |
| 	r.RUnlock()
 | |
| 
 | |
| 	// wait for all goroutines to finish
 | |
| 	r.wg.Wait()
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // String prints debugging information about router
 | |
| func (r *router) String() string {
 | |
| 	return "router"
 | |
| }
 |