2019-06-06 18:37:40 +03:00
|
|
|
package router
|
|
|
|
|
|
|
|
import (
|
2019-06-10 01:09:38 +03:00
|
|
|
"fmt"
|
2019-07-05 21:15:32 +03:00
|
|
|
"math"
|
2019-07-03 21:50:07 +03:00
|
|
|
"sort"
|
2019-06-10 01:09:38 +03:00
|
|
|
"strings"
|
2019-06-12 01:59:25 +03:00
|
|
|
"sync"
|
2019-06-28 13:53:55 +03:00
|
|
|
"time"
|
2019-06-10 01:09:38 +03:00
|
|
|
|
2019-07-29 20:57:40 +03:00
|
|
|
"github.com/google/uuid"
|
2019-06-06 18:37:40 +03:00
|
|
|
"github.com/micro/go-micro/registry"
|
2019-09-05 21:05:47 +03:00
|
|
|
"github.com/micro/go-micro/util/log"
|
2019-06-06 18:37:40 +03:00
|
|
|
)
|
|
|
|
|
2019-07-03 21:50:07 +03:00
|
|
|
const (
|
2019-07-11 14:36:39 +03:00
|
|
|
// AdvertiseEventsTick is time interval in which the router advertises route updates
|
|
|
|
AdvertiseEventsTick = 5 * time.Second
|
|
|
|
// AdvertiseTableTick is time interval in which router advertises all routes found in routing table
|
|
|
|
AdvertiseTableTick = 1 * time.Minute
|
2019-07-24 19:16:52 +03:00
|
|
|
// AdvertiseFlushTick is time the yet unconsumed advertisements are flush i.e. discarded
|
|
|
|
AdvertiseFlushTick = 15 * time.Second
|
2019-07-05 21:15:32 +03:00
|
|
|
// AdvertSuppress is advert suppression threshold
|
2019-07-16 21:00:25 +03:00
|
|
|
AdvertSuppress = 2000.0
|
2019-07-08 23:03:54 +03:00
|
|
|
// AdvertRecover is advert recovery threshold
|
2019-07-16 21:00:25 +03:00
|
|
|
AdvertRecover = 750.0
|
2019-07-08 23:03:54 +03:00
|
|
|
// DefaultAdvertTTL is default advertisement TTL
|
2019-07-11 14:36:39 +03:00
|
|
|
DefaultAdvertTTL = 1 * time.Minute
|
2019-07-16 21:00:25 +03:00
|
|
|
// DeletePenalty penalises route deletion
|
|
|
|
DeletePenalty = 1000.0
|
2019-07-08 23:03:54 +03:00
|
|
|
// UpdatePenalty penalises route updates
|
2019-07-16 21:00:25 +03:00
|
|
|
UpdatePenalty = 500.0
|
|
|
|
// PenaltyHalfLife is the time the advert penalty decays to half its value
|
|
|
|
PenaltyHalfLife = 2.0
|
|
|
|
// MaxSuppressTime defines time after which the suppressed advert is deleted
|
|
|
|
MaxSuppressTime = 5 * time.Minute
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// PenaltyDecay is a coefficient which controls the speed the advert penalty decays
|
|
|
|
PenaltyDecay = math.Log(2) / PenaltyHalfLife
|
2019-07-03 21:50:07 +03:00
|
|
|
)
|
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// router implements default router
|
2019-06-06 18:37:40 +03:00
|
|
|
type router struct {
|
2019-07-29 14:44:28 +03:00
|
|
|
sync.RWMutex
|
2019-08-28 01:08:35 +03:00
|
|
|
options Options
|
2019-07-29 20:57:40 +03:00
|
|
|
status Status
|
2019-08-12 20:18:17 +03:00
|
|
|
table *table
|
2019-07-29 20:57:40 +03:00
|
|
|
exit chan struct{}
|
|
|
|
errChan chan error
|
|
|
|
eventChan chan *Event
|
|
|
|
advertWg *sync.WaitGroup
|
|
|
|
wg *sync.WaitGroup
|
|
|
|
|
|
|
|
// advert subscribers
|
|
|
|
subscribers map[string]chan *Advert
|
2019-06-06 18:37:40 +03:00
|
|
|
}
|
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// newRouter creates new router and returns it
|
2019-06-06 18:37:40 +03:00
|
|
|
func newRouter(opts ...Option) Router {
|
2019-06-13 00:30:42 +03:00
|
|
|
// get default options
|
|
|
|
options := DefaultOptions()
|
2019-06-06 18:37:40 +03:00
|
|
|
|
2019-06-12 01:59:25 +03:00
|
|
|
// apply requested options
|
2019-06-06 18:37:40 +03:00
|
|
|
for _, o := range opts {
|
2019-06-10 21:50:54 +03:00
|
|
|
o(&options)
|
|
|
|
}
|
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// set initial status to Stopped
|
|
|
|
status := Status{Code: Stopped, Error: nil}
|
|
|
|
|
|
|
|
return &router{
|
2019-08-28 01:08:35 +03:00
|
|
|
options: options,
|
2019-08-12 20:18:17 +03:00
|
|
|
status: status,
|
|
|
|
table: newTable(),
|
2019-07-29 20:57:40 +03:00
|
|
|
advertWg: &sync.WaitGroup{},
|
|
|
|
wg: &sync.WaitGroup{},
|
|
|
|
subscribers: make(map[string]chan *Advert),
|
2019-06-06 18:37:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-07 01:29:24 +03:00
|
|
|
// Init initializes router with given options
|
2019-06-06 18:37:40 +03:00
|
|
|
func (r *router) Init(opts ...Option) error {
|
2019-08-12 20:18:17 +03:00
|
|
|
r.Lock()
|
|
|
|
defer r.Unlock()
|
|
|
|
|
2019-06-06 18:37:40 +03:00
|
|
|
for _, o := range opts {
|
2019-08-28 01:08:35 +03:00
|
|
|
o(&r.options)
|
2019-06-06 18:37:40 +03:00
|
|
|
}
|
2019-08-12 20:18:17 +03:00
|
|
|
|
2019-06-06 18:37:40 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-06-07 01:29:24 +03:00
|
|
|
// Options returns router options
|
2019-06-06 18:37:40 +03:00
|
|
|
func (r *router) Options() Options {
|
2019-08-12 20:18:17 +03:00
|
|
|
r.Lock()
|
2019-08-28 01:08:35 +03:00
|
|
|
options := r.options
|
2019-08-12 20:18:17 +03:00
|
|
|
r.Unlock()
|
|
|
|
|
2019-08-28 01:08:35 +03:00
|
|
|
return options
|
2019-06-06 18:37:40 +03:00
|
|
|
}
|
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// Table returns routing table
|
2019-07-29 20:57:40 +03:00
|
|
|
func (r *router) Table() Table {
|
|
|
|
return r.table
|
|
|
|
}
|
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// manageRoute applies action on a given route
|
2019-07-26 01:19:05 +03:00
|
|
|
func (r *router) manageRoute(route Route, action string) error {
|
2019-07-11 14:36:39 +03:00
|
|
|
switch action {
|
|
|
|
case "create":
|
2019-07-29 20:57:40 +03:00
|
|
|
if err := r.table.Create(route); err != nil && err != ErrDuplicateRoute {
|
2019-07-11 14:36:39 +03:00
|
|
|
return fmt.Errorf("failed adding route for service %s: %s", route.Service, err)
|
|
|
|
}
|
|
|
|
case "delete":
|
2019-07-29 20:57:40 +03:00
|
|
|
if err := r.table.Delete(route); err != nil && err != ErrRouteNotFound {
|
2019-07-11 14:36:39 +03:00
|
|
|
return fmt.Errorf("failed deleting route for service %s: %s", route.Service, err)
|
|
|
|
}
|
2019-09-26 13:56:30 +03:00
|
|
|
case "update":
|
|
|
|
if err := r.table.Update(route); err != nil {
|
|
|
|
return fmt.Errorf("failed updating route for service %s: %s", route.Service, err)
|
|
|
|
}
|
2019-09-05 18:04:44 +03:00
|
|
|
case "solicit":
|
|
|
|
// nothing to do here
|
|
|
|
return nil
|
2019-07-11 14:36:39 +03:00
|
|
|
default:
|
2019-09-26 13:56:30 +03:00
|
|
|
return fmt.Errorf("failed to manage route for service %s: unknown action %s", route.Service, action)
|
2019-07-11 14:36:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-07-24 19:22:27 +03:00
|
|
|
// manageServiceRoutes applies action to all routes of the service.
|
2019-07-24 19:16:52 +03:00
|
|
|
// It returns error of the action fails with error.
|
2019-07-09 14:46:15 +03:00
|
|
|
func (r *router) manageServiceRoutes(service *registry.Service, action string) error {
|
2019-07-08 18:16:50 +03:00
|
|
|
// action is the routing table action
|
|
|
|
action = strings.ToLower(action)
|
2019-07-10 23:28:32 +03:00
|
|
|
|
2019-07-08 18:16:50 +03:00
|
|
|
// take route action on each service node
|
|
|
|
for _, node := range service.Nodes {
|
2019-07-26 01:19:05 +03:00
|
|
|
route := Route{
|
2019-07-09 17:45:42 +03:00
|
|
|
Service: service.Name,
|
|
|
|
Address: node.Address,
|
|
|
|
Gateway: "",
|
2019-08-28 01:08:35 +03:00
|
|
|
Network: r.options.Network,
|
|
|
|
Router: r.options.Id,
|
2019-07-26 01:19:05 +03:00
|
|
|
Link: DefaultLink,
|
|
|
|
Metric: DefaultLocalMetric,
|
2019-07-08 18:16:50 +03:00
|
|
|
}
|
2019-07-10 23:28:32 +03:00
|
|
|
|
2019-07-11 14:36:39 +03:00
|
|
|
if err := r.manageRoute(route, action); err != nil {
|
|
|
|
return err
|
2019-07-08 18:16:50 +03:00
|
|
|
}
|
|
|
|
}
|
2019-07-10 23:28:32 +03:00
|
|
|
|
2019-07-08 18:16:50 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-07-24 19:22:27 +03:00
|
|
|
// manageRegistryRoutes applies action to all routes of each service found in the registry.
|
2019-07-08 23:03:54 +03:00
|
|
|
// It returns error if either the services failed to be listed or the routing table action fails.
|
2019-07-09 14:46:15 +03:00
|
|
|
func (r *router) manageRegistryRoutes(reg registry.Registry, action string) error {
|
2019-06-13 00:30:42 +03:00
|
|
|
services, err := reg.ListServices()
|
|
|
|
if err != nil {
|
2019-07-03 21:50:07 +03:00
|
|
|
return fmt.Errorf("failed listing services: %v", err)
|
2019-06-13 00:30:42 +03:00
|
|
|
}
|
|
|
|
|
2019-06-28 00:52:51 +03:00
|
|
|
// add each service node as a separate route
|
2019-06-13 00:30:42 +03:00
|
|
|
for _, service := range services {
|
2019-06-27 16:37:52 +03:00
|
|
|
// get the service to retrieve all its info
|
|
|
|
srvs, err := reg.GetService(service.Name)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
2019-07-08 18:51:55 +03:00
|
|
|
// manage the routes for all returned services
|
2019-07-11 14:36:39 +03:00
|
|
|
for _, srv := range srvs {
|
|
|
|
if err := r.manageServiceRoutes(srv, action); err != nil {
|
2019-07-08 18:16:50 +03:00
|
|
|
return err
|
2019-06-19 20:01:48 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-06-26 18:03:19 +03:00
|
|
|
|
|
|
|
return nil
|
2019-06-19 20:01:48 +03:00
|
|
|
}
|
|
|
|
|
2019-07-17 02:06:11 +03:00
|
|
|
// watchRegistry watches registry and updates routing table based on the received events.
|
2019-07-16 21:00:25 +03:00
|
|
|
// It returns error if either the registry watcher fails with error or if the routing table update fails.
|
|
|
|
func (r *router) watchRegistry(w registry.Watcher) error {
|
2019-08-02 17:17:48 +03:00
|
|
|
exit := make(chan bool)
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
// close the exit channel when the go routine finishes
|
|
|
|
close(exit)
|
|
|
|
r.wg.Done()
|
|
|
|
}()
|
|
|
|
|
2019-08-21 23:10:42 +03:00
|
|
|
// wait in the background for the router to stop
|
|
|
|
// when the router stops, stop the watcher and exit
|
|
|
|
r.wg.Add(1)
|
2019-06-12 01:59:25 +03:00
|
|
|
go func() {
|
2019-08-02 17:17:48 +03:00
|
|
|
defer w.Stop()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-r.exit:
|
|
|
|
return
|
|
|
|
case <-exit:
|
|
|
|
return
|
|
|
|
}
|
2019-06-12 01:59:25 +03:00
|
|
|
}()
|
|
|
|
|
|
|
|
var watchErr error
|
|
|
|
|
|
|
|
for {
|
|
|
|
res, err := w.Next()
|
|
|
|
if err != nil {
|
2019-07-01 17:43:50 +03:00
|
|
|
if err != registry.ErrWatcherStopped {
|
|
|
|
watchErr = err
|
|
|
|
}
|
2019-06-12 01:59:25 +03:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2019-07-09 14:46:15 +03:00
|
|
|
if err := r.manageServiceRoutes(res.Service, res.Action); err != nil {
|
2019-07-08 18:16:50 +03:00
|
|
|
return err
|
2019-06-13 00:30:42 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return watchErr
|
|
|
|
}
|
|
|
|
|
2019-06-28 00:52:51 +03:00
|
|
|
// watchTable watches routing table entries and either adds or deletes locally registered service to/from network registry
|
|
|
|
// It returns error if the locally registered services either fails to be added/deleted to/from network registry.
|
2019-07-26 01:19:05 +03:00
|
|
|
func (r *router) watchTable(w Watcher) error {
|
2019-08-02 17:17:48 +03:00
|
|
|
exit := make(chan bool)
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
// close the exit channel when the go routine finishes
|
|
|
|
close(exit)
|
|
|
|
r.wg.Done()
|
|
|
|
}()
|
|
|
|
|
2019-08-21 23:10:42 +03:00
|
|
|
// wait in the background for the router to stop
|
|
|
|
// when the router stops, stop the watcher and exit
|
|
|
|
r.wg.Add(1)
|
2019-06-28 00:52:51 +03:00
|
|
|
go func() {
|
2019-08-02 17:17:48 +03:00
|
|
|
defer w.Stop()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-r.exit:
|
|
|
|
return
|
|
|
|
case <-exit:
|
|
|
|
return
|
|
|
|
}
|
2019-06-28 00:52:51 +03:00
|
|
|
}()
|
|
|
|
|
|
|
|
var watchErr error
|
|
|
|
|
|
|
|
for {
|
|
|
|
event, err := w.Next()
|
|
|
|
if err != nil {
|
2019-07-26 01:19:05 +03:00
|
|
|
if err != ErrWatcherStopped {
|
2019-07-01 17:43:50 +03:00
|
|
|
watchErr = err
|
|
|
|
}
|
2019-06-28 00:52:51 +03:00
|
|
|
break
|
|
|
|
}
|
2019-07-08 18:51:55 +03:00
|
|
|
|
2019-07-05 21:15:32 +03:00
|
|
|
select {
|
|
|
|
case <-r.exit:
|
|
|
|
close(r.eventChan)
|
|
|
|
return nil
|
|
|
|
case r.eventChan <- event:
|
2019-06-28 13:53:55 +03:00
|
|
|
}
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// close event channel on error
|
|
|
|
close(r.eventChan)
|
|
|
|
|
|
|
|
return watchErr
|
|
|
|
}
|
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// publishAdvert publishes router advert to advert channel
|
|
|
|
// NOTE: this might cease to be a dedicated method in the future
|
2019-07-26 01:19:05 +03:00
|
|
|
func (r *router) publishAdvert(advType AdvertType, events []*Event) {
|
2019-07-09 14:46:15 +03:00
|
|
|
defer r.advertWg.Done()
|
|
|
|
|
|
|
|
a := &Advert{
|
2019-08-28 01:08:35 +03:00
|
|
|
Id: r.options.Id,
|
2019-07-09 14:46:15 +03:00
|
|
|
Type: advType,
|
2019-07-11 14:36:39 +03:00
|
|
|
TTL: DefaultAdvertTTL,
|
2019-07-09 14:46:15 +03:00
|
|
|
Timestamp: time.Now(),
|
|
|
|
Events: events,
|
|
|
|
}
|
|
|
|
|
2019-09-05 21:05:47 +03:00
|
|
|
log.Debugf("Router publishing advert; %+v", a)
|
2019-07-29 20:57:40 +03:00
|
|
|
r.RLock()
|
|
|
|
for _, sub := range r.subscribers {
|
|
|
|
// check the exit chan first
|
|
|
|
select {
|
|
|
|
case <-r.exit:
|
|
|
|
r.RUnlock()
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// now send the message
|
|
|
|
select {
|
|
|
|
case sub <- a:
|
|
|
|
default:
|
|
|
|
}
|
2019-07-09 14:46:15 +03:00
|
|
|
}
|
2019-07-29 20:57:40 +03:00
|
|
|
r.RUnlock()
|
2019-07-11 14:36:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// advertiseTable advertises the whole routing table to the network
|
|
|
|
func (r *router) advertiseTable() error {
|
|
|
|
// create table advertisement ticker
|
|
|
|
ticker := time.NewTicker(AdvertiseTableTick)
|
2019-08-02 16:44:11 +03:00
|
|
|
defer ticker.Stop()
|
2019-07-11 14:36:39 +03:00
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ticker.C:
|
2019-10-09 18:03:06 +03:00
|
|
|
// do full table flush
|
|
|
|
events, err := r.flushRouteEvents(Update)
|
2019-07-11 14:36:39 +03:00
|
|
|
if err != nil {
|
2019-10-09 18:03:06 +03:00
|
|
|
return fmt.Errorf("failed flushing routes: %s", err)
|
2019-07-11 14:36:39 +03:00
|
|
|
}
|
|
|
|
|
2019-10-09 18:03:06 +03:00
|
|
|
// advertise routes to subscribers
|
2019-07-11 14:36:39 +03:00
|
|
|
if len(events) > 0 {
|
2019-09-25 22:29:25 +03:00
|
|
|
log.Debugf("Router flushing table with %d events: %s", len(events), r.options.Id)
|
2019-07-17 15:02:47 +03:00
|
|
|
r.advertWg.Add(1)
|
2019-07-26 01:19:05 +03:00
|
|
|
go r.publishAdvert(RouteUpdate, events)
|
2019-07-11 14:36:39 +03:00
|
|
|
}
|
|
|
|
case <-r.exit:
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
2019-07-09 14:46:15 +03:00
|
|
|
}
|
|
|
|
|
2019-09-26 19:51:57 +03:00
|
|
|
// routeAdvert contains a route event to be advertised
|
2019-07-16 21:00:25 +03:00
|
|
|
type routeAdvert struct {
|
2019-09-26 19:51:57 +03:00
|
|
|
// event received from routing table
|
|
|
|
event *Event
|
2019-07-16 21:00:25 +03:00
|
|
|
// lastUpdate records the time of the last advert update
|
|
|
|
lastUpdate time.Time
|
|
|
|
// penalty is current advert penalty
|
2019-07-08 23:03:54 +03:00
|
|
|
penalty float64
|
2019-07-16 21:00:25 +03:00
|
|
|
// isSuppressed flags the advert suppression
|
2019-07-08 23:03:54 +03:00
|
|
|
isSuppressed bool
|
2019-07-16 21:00:25 +03:00
|
|
|
// suppressTime records the time interval the advert has been suppressed for
|
|
|
|
suppressTime time.Time
|
2019-07-08 23:03:54 +03:00
|
|
|
}
|
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// advertiseEvents advertises routing table events
|
2019-07-05 21:15:32 +03:00
|
|
|
// It suppresses unhealthy flapping events and advertises healthy events upstream.
|
2019-07-24 19:16:52 +03:00
|
|
|
func (r *router) advertiseEvents() error {
|
2019-07-05 21:15:32 +03:00
|
|
|
// ticker to periodically scan event for advertising
|
2019-07-11 14:36:39 +03:00
|
|
|
ticker := time.NewTicker(AdvertiseEventsTick)
|
2019-08-02 16:44:11 +03:00
|
|
|
defer ticker.Stop()
|
|
|
|
|
2019-07-16 21:00:25 +03:00
|
|
|
// advertMap is a map of advert events
|
|
|
|
advertMap := make(map[uint64]*routeAdvert)
|
2019-07-05 21:15:32 +03:00
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// routing table watcher
|
|
|
|
tableWatcher, err := r.Watch()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed creating routing table watcher: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
r.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer r.wg.Done()
|
|
|
|
select {
|
|
|
|
case r.errChan <- r.watchTable(tableWatcher):
|
|
|
|
case <-r.exit:
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2019-07-05 21:15:32 +03:00
|
|
|
for {
|
2019-06-28 13:53:55 +03:00
|
|
|
select {
|
2019-07-05 21:15:32 +03:00
|
|
|
case <-ticker.C:
|
2019-07-26 01:19:05 +03:00
|
|
|
var events []*Event
|
2019-07-08 23:03:54 +03:00
|
|
|
// collect all events which are not flapping
|
2019-07-16 21:00:25 +03:00
|
|
|
for key, advert := range advertMap {
|
|
|
|
// decay the event penalty
|
|
|
|
delta := time.Since(advert.lastUpdate).Seconds()
|
|
|
|
advert.penalty = advert.penalty * math.Exp(-delta*PenaltyDecay)
|
|
|
|
|
|
|
|
// suppress/recover the event based on its penalty level
|
|
|
|
switch {
|
|
|
|
case advert.penalty > AdvertSuppress && !advert.isSuppressed:
|
2019-09-26 19:51:57 +03:00
|
|
|
log.Debugf("Router supressing advert %d for route %s", key, advert.event.Route.Address)
|
2019-07-16 21:00:25 +03:00
|
|
|
advert.isSuppressed = true
|
|
|
|
advert.suppressTime = time.Now()
|
|
|
|
case advert.penalty < AdvertRecover && advert.isSuppressed:
|
2019-09-26 19:51:57 +03:00
|
|
|
log.Debugf("Router recovering advert %d for route %s", key, advert.event.Route.Address)
|
2019-07-16 21:00:25 +03:00
|
|
|
advert.isSuppressed = false
|
|
|
|
}
|
|
|
|
|
|
|
|
// max suppression time threshold has been reached, delete the advert
|
|
|
|
if advert.isSuppressed {
|
|
|
|
if time.Since(advert.suppressTime) > MaxSuppressTime {
|
|
|
|
delete(advertMap, key)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !advert.isSuppressed {
|
2019-09-26 19:51:57 +03:00
|
|
|
e := new(Event)
|
|
|
|
*e = *(advert.event)
|
|
|
|
events = append(events, e)
|
|
|
|
// delete the advert from the advertMap
|
|
|
|
delete(advertMap, key)
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-11 14:36:39 +03:00
|
|
|
// advertise all Update events to subscribers
|
2019-07-05 21:15:32 +03:00
|
|
|
if len(events) > 0 {
|
2019-07-09 14:46:15 +03:00
|
|
|
r.advertWg.Add(1)
|
2019-09-26 13:56:30 +03:00
|
|
|
log.Debugf("Router publishing %d events", len(events))
|
2019-07-26 01:19:05 +03:00
|
|
|
go r.publishAdvert(RouteUpdate, events)
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
|
|
|
case e := <-r.eventChan:
|
2019-07-08 23:03:54 +03:00
|
|
|
// if event is nil, continue
|
2019-07-05 21:15:32 +03:00
|
|
|
if e == nil {
|
|
|
|
continue
|
|
|
|
}
|
2019-09-26 13:56:30 +03:00
|
|
|
log.Debugf("Router processing table event %s for service %s", e.Type, e.Route.Address)
|
2019-07-05 21:15:32 +03:00
|
|
|
// determine the event penalty
|
|
|
|
var penalty float64
|
|
|
|
switch e.Type {
|
2019-07-26 01:19:05 +03:00
|
|
|
case Update:
|
2019-07-08 23:03:54 +03:00
|
|
|
penalty = UpdatePenalty
|
2019-07-26 01:19:05 +03:00
|
|
|
case Delete:
|
2019-07-16 21:00:25 +03:00
|
|
|
penalty = DeletePenalty
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
2019-07-16 21:00:25 +03:00
|
|
|
|
|
|
|
// check if we have already registered the route
|
2019-07-05 21:15:32 +03:00
|
|
|
hash := e.Route.Hash()
|
2019-07-16 21:00:25 +03:00
|
|
|
advert, ok := advertMap[hash]
|
2019-07-05 21:15:32 +03:00
|
|
|
if !ok {
|
2019-07-16 21:00:25 +03:00
|
|
|
advert = &routeAdvert{
|
2019-09-26 19:51:57 +03:00
|
|
|
event: e,
|
2019-07-16 21:00:25 +03:00
|
|
|
penalty: penalty,
|
|
|
|
lastUpdate: time.Now(),
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
2019-07-16 21:00:25 +03:00
|
|
|
advertMap[hash] = advert
|
2019-07-05 21:15:32 +03:00
|
|
|
continue
|
|
|
|
}
|
2019-07-16 21:00:25 +03:00
|
|
|
|
2019-09-26 19:51:57 +03:00
|
|
|
// override the route event only if the last event was different
|
|
|
|
if advert.event.Type != e.Type {
|
|
|
|
advert.event = e
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
2019-07-16 21:00:25 +03:00
|
|
|
|
2019-09-26 19:51:57 +03:00
|
|
|
// update event penalty and timestamp
|
2019-07-16 21:00:25 +03:00
|
|
|
advert.lastUpdate = time.Now()
|
|
|
|
advert.penalty += penalty
|
2019-09-26 19:51:57 +03:00
|
|
|
log.Debugf("Router advert %d for route %s event penalty: %f", hash, advert.event.Route.Address, advert.penalty)
|
2019-06-28 13:53:55 +03:00
|
|
|
case <-r.exit:
|
2019-07-09 14:46:15 +03:00
|
|
|
// first wait for the advertiser to finish
|
|
|
|
r.advertWg.Wait()
|
|
|
|
return nil
|
2019-06-28 00:52:51 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-21 23:10:42 +03:00
|
|
|
// close closes exit channels
|
|
|
|
func (r *router) close() {
|
|
|
|
// notify all goroutines to finish
|
|
|
|
close(r.exit)
|
|
|
|
|
|
|
|
// drain the advertise channel only if advertising
|
|
|
|
if r.status.Code == Advertising {
|
|
|
|
// drain the event channel
|
|
|
|
for range r.eventChan {
|
|
|
|
}
|
|
|
|
|
|
|
|
// close advert subscribers
|
|
|
|
for id, sub := range r.subscribers {
|
|
|
|
// close the channel
|
|
|
|
close(sub)
|
|
|
|
|
|
|
|
// delete the subscriber
|
|
|
|
delete(r.subscribers, id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// mark the router as Stopped and set its Error to nil
|
|
|
|
r.status = Status{Code: Stopped, Error: nil}
|
|
|
|
}
|
|
|
|
|
2019-07-08 18:51:55 +03:00
|
|
|
// watchErrors watches router errors and takes appropriate actions
|
2019-07-24 19:16:52 +03:00
|
|
|
func (r *router) watchErrors() {
|
2019-06-29 02:46:22 +03:00
|
|
|
var err error
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-r.exit:
|
2019-08-21 23:10:42 +03:00
|
|
|
return
|
2019-07-24 19:16:52 +03:00
|
|
|
case err = <-r.errChan:
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
r.Lock()
|
|
|
|
defer r.Unlock()
|
2019-08-12 20:18:17 +03:00
|
|
|
// if the router is not stopped, stop it
|
2019-07-24 19:16:52 +03:00
|
|
|
if r.status.Code != Stopped {
|
2019-08-21 23:10:42 +03:00
|
|
|
// close all the channels
|
|
|
|
r.close()
|
|
|
|
// set the status error
|
|
|
|
if err != nil {
|
|
|
|
r.status.Error = err
|
2019-07-05 21:15:32 +03:00
|
|
|
}
|
2019-07-24 19:16:52 +03:00
|
|
|
}
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// Start starts the router
|
|
|
|
func (r *router) Start() error {
|
2019-06-29 02:46:22 +03:00
|
|
|
r.Lock()
|
|
|
|
defer r.Unlock()
|
|
|
|
|
2019-08-21 20:58:56 +03:00
|
|
|
// only start if we're stopped
|
|
|
|
if r.status.Code != Stopped {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// add all local service routes into the routing table
|
2019-08-28 01:08:35 +03:00
|
|
|
if err := r.manageRegistryRoutes(r.options.Registry, "create"); err != nil {
|
2019-08-12 20:18:17 +03:00
|
|
|
e := fmt.Errorf("failed adding registry routes: %s", err)
|
|
|
|
r.status = Status{Code: Error, Error: e}
|
|
|
|
return e
|
|
|
|
}
|
2019-07-09 17:01:52 +03:00
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// add default gateway into routing table
|
2019-08-28 01:08:35 +03:00
|
|
|
if r.options.Gateway != "" {
|
2019-08-12 20:18:17 +03:00
|
|
|
// note, the only non-default value is the gateway
|
|
|
|
route := Route{
|
|
|
|
Service: "*",
|
|
|
|
Address: "*",
|
2019-08-28 01:08:35 +03:00
|
|
|
Gateway: r.options.Gateway,
|
2019-08-12 20:18:17 +03:00
|
|
|
Network: "*",
|
2019-08-28 01:08:35 +03:00
|
|
|
Router: r.options.Id,
|
|
|
|
Link: DefaultLink,
|
2019-08-12 20:18:17 +03:00
|
|
|
Metric: DefaultLocalMetric,
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
2019-08-12 20:18:17 +03:00
|
|
|
if err := r.table.Create(route); err != nil {
|
|
|
|
e := fmt.Errorf("failed adding default gateway route: %s", err)
|
|
|
|
r.status = Status{Code: Error, Error: e}
|
|
|
|
return e
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
2019-08-12 20:18:17 +03:00
|
|
|
}
|
2019-06-29 02:46:22 +03:00
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// create error and exit channels
|
|
|
|
r.errChan = make(chan error, 1)
|
|
|
|
r.exit = make(chan struct{})
|
2019-06-29 02:46:22 +03:00
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// registry watcher
|
2019-08-28 01:08:35 +03:00
|
|
|
regWatcher, err := r.options.Registry.Watch()
|
2019-08-12 20:18:17 +03:00
|
|
|
if err != nil {
|
|
|
|
e := fmt.Errorf("failed creating registry watcher: %v", err)
|
|
|
|
r.status = Status{Code: Error, Error: e}
|
|
|
|
return e
|
|
|
|
}
|
2019-07-05 21:15:32 +03:00
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
r.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer r.wg.Done()
|
|
|
|
select {
|
|
|
|
case r.errChan <- r.watchRegistry(regWatcher):
|
|
|
|
case <-r.exit:
|
|
|
|
}
|
|
|
|
}()
|
2019-07-24 19:16:52 +03:00
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// watch for errors and cleanup
|
|
|
|
r.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer r.wg.Done()
|
|
|
|
r.watchErrors()
|
|
|
|
}()
|
2019-07-24 19:16:52 +03:00
|
|
|
|
2019-08-12 20:18:17 +03:00
|
|
|
// mark router as Running
|
|
|
|
r.status = Status{Code: Running, Error: nil}
|
|
|
|
|
|
|
|
return nil
|
2019-07-24 19:16:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Advertise stars advertising the routes to the network and returns the advertisements channel to consume from.
|
|
|
|
// If the router is already advertising it returns the channel to consume from.
|
|
|
|
// It returns error if either the router is not running or if the routing table fails to list the routes to advertise.
|
|
|
|
func (r *router) Advertise() (<-chan *Advert, error) {
|
|
|
|
r.Lock()
|
|
|
|
defer r.Unlock()
|
|
|
|
|
|
|
|
switch r.status.Code {
|
|
|
|
case Advertising:
|
2019-09-05 21:05:47 +03:00
|
|
|
advertChan := make(chan *Advert, 128)
|
2019-07-29 20:57:40 +03:00
|
|
|
r.subscribers[uuid.New().String()] = advertChan
|
|
|
|
return advertChan, nil
|
2019-07-24 19:16:52 +03:00
|
|
|
case Running:
|
2019-09-05 15:23:33 +03:00
|
|
|
// list all the routes and pack them into even slice to advertise
|
2019-09-05 18:04:44 +03:00
|
|
|
events, err := r.flushRouteEvents(Create)
|
2019-07-24 19:16:52 +03:00
|
|
|
if err != nil {
|
2019-09-05 18:04:44 +03:00
|
|
|
return nil, fmt.Errorf("failed to flush routes: %s", err)
|
2019-07-24 19:16:52 +03:00
|
|
|
}
|
|
|
|
|
2019-07-29 20:57:40 +03:00
|
|
|
// create event channels
|
2019-07-26 01:19:05 +03:00
|
|
|
r.eventChan = make(chan *Event)
|
2019-07-24 19:16:52 +03:00
|
|
|
|
|
|
|
// advertise your presence
|
|
|
|
r.advertWg.Add(1)
|
|
|
|
go r.publishAdvert(Announce, events)
|
|
|
|
|
2019-07-05 21:15:32 +03:00
|
|
|
r.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer r.wg.Done()
|
2019-07-24 19:16:52 +03:00
|
|
|
select {
|
|
|
|
case r.errChan <- r.advertiseEvents():
|
|
|
|
case <-r.exit:
|
|
|
|
}
|
2019-06-29 02:46:22 +03:00
|
|
|
}()
|
|
|
|
|
2019-07-11 14:36:39 +03:00
|
|
|
r.advertWg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer r.advertWg.Done()
|
|
|
|
// advertise the whole routing table
|
2019-07-24 19:16:52 +03:00
|
|
|
select {
|
|
|
|
case r.errChan <- r.advertiseTable():
|
|
|
|
case <-r.exit:
|
|
|
|
}
|
2019-07-11 14:36:39 +03:00
|
|
|
}()
|
2019-07-01 17:43:50 +03:00
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
// mark router as Running and set its Error to nil
|
|
|
|
r.status = Status{Code: Advertising, Error: nil}
|
2019-07-08 23:03:54 +03:00
|
|
|
|
2019-07-29 20:57:40 +03:00
|
|
|
// create advert channel
|
2019-09-05 21:05:47 +03:00
|
|
|
advertChan := make(chan *Advert, 128)
|
2019-07-29 20:57:40 +03:00
|
|
|
r.subscribers[uuid.New().String()] = advertChan
|
|
|
|
|
|
|
|
return advertChan, nil
|
2019-07-24 19:16:52 +03:00
|
|
|
case Stopped:
|
|
|
|
return nil, fmt.Errorf("not running")
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
|
|
|
|
2019-07-24 19:16:52 +03:00
|
|
|
return nil, fmt.Errorf("error: %s", r.status.Error)
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
|
|
|
|
2019-07-10 09:45:27 +03:00
|
|
|
// Process updates the routing table using the advertised values
|
|
|
|
func (r *router) Process(a *Advert) error {
|
2019-07-03 21:50:07 +03:00
|
|
|
// NOTE: event sorting might not be necessary
|
|
|
|
// copy update events intp new slices
|
2019-07-26 01:19:05 +03:00
|
|
|
events := make([]*Event, len(a.Events))
|
2019-07-04 04:06:59 +03:00
|
|
|
copy(events, a.Events)
|
2019-07-03 21:50:07 +03:00
|
|
|
// sort events by timestamp
|
|
|
|
sort.Slice(events, func(i, j int) bool {
|
|
|
|
return events[i].Timestamp.Before(events[j].Timestamp)
|
|
|
|
})
|
|
|
|
|
2019-09-26 14:07:26 +03:00
|
|
|
log.Debugf("Router %s processing advert from: %s", r.options.Id, a.Id)
|
|
|
|
|
2019-07-03 21:50:07 +03:00
|
|
|
for _, event := range events {
|
2019-08-28 01:08:35 +03:00
|
|
|
// skip if the router is the origin of this route
|
|
|
|
if event.Route.Router == r.options.Id {
|
2019-09-25 22:29:25 +03:00
|
|
|
log.Debugf("Router skipping processing its own route: %s", r.options.Id)
|
2019-08-28 01:08:35 +03:00
|
|
|
continue
|
|
|
|
}
|
2019-07-09 17:45:42 +03:00
|
|
|
// create a copy of the route
|
|
|
|
route := event.Route
|
2019-07-11 14:36:39 +03:00
|
|
|
action := event.Type
|
2019-09-26 14:07:26 +03:00
|
|
|
log.Debugf("Router %s applying %s from router %s for address: %s", r.options.Id, action, route.Router, route.Address)
|
2019-07-11 14:36:39 +03:00
|
|
|
if err := r.manageRoute(route, fmt.Sprintf("%s", action)); err != nil {
|
|
|
|
return fmt.Errorf("failed applying action %s to routing table: %s", action, err)
|
2019-07-03 21:50:07 +03:00
|
|
|
}
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
|
|
|
|
2019-07-03 21:50:07 +03:00
|
|
|
return nil
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
|
|
|
|
2019-09-05 18:04:44 +03:00
|
|
|
// flushRouteEvents returns a slice of events, one per each route in the routing table
|
|
|
|
func (r *router) flushRouteEvents(evType EventType) ([]*Event, error) {
|
2019-09-05 15:23:33 +03:00
|
|
|
// list all routes
|
|
|
|
routes, err := r.table.List()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed listing routes: %s", err)
|
|
|
|
}
|
|
|
|
|
2019-10-09 21:08:24 +03:00
|
|
|
if r.options.Advertise == AdvertiseAll {
|
2019-10-09 19:23:02 +03:00
|
|
|
// build a list of events to advertise
|
|
|
|
events := make([]*Event, len(routes))
|
|
|
|
for i, route := range routes {
|
|
|
|
event := &Event{
|
|
|
|
Type: evType,
|
|
|
|
Timestamp: time.Now(),
|
|
|
|
Route: route,
|
|
|
|
}
|
|
|
|
events[i] = event
|
|
|
|
}
|
|
|
|
return events, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// routeMap stores optimal routes per service
|
2019-10-09 21:08:24 +03:00
|
|
|
bestRoutes := make(map[string]Route)
|
2019-10-09 19:23:02 +03:00
|
|
|
|
|
|
|
// go through all routes found in the routing table and collapse them to optimal routes
|
|
|
|
for _, route := range routes {
|
2019-10-09 21:08:24 +03:00
|
|
|
routeKey := route.Service + "@" + route.Network
|
|
|
|
optimal, ok := bestRoutes[routeKey]
|
2019-10-09 19:23:02 +03:00
|
|
|
if !ok {
|
2019-10-09 21:08:24 +03:00
|
|
|
bestRoutes[routeKey] = route
|
2019-10-09 19:23:02 +03:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
// if the current optimal route metric is higher than routing table route, replace it
|
|
|
|
if optimal.Metric > route.Metric {
|
2019-10-09 21:08:24 +03:00
|
|
|
bestRoutes[routeKey] = route
|
2019-10-09 19:23:02 +03:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
// if the metrics are the same, prefer advertising your own route
|
|
|
|
if optimal.Metric == route.Metric {
|
|
|
|
if route.Router == r.options.Id {
|
2019-10-09 21:08:24 +03:00
|
|
|
bestRoutes[routeKey] = route
|
2019-10-09 19:23:02 +03:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-10-09 18:03:06 +03:00
|
|
|
|
2019-10-09 21:08:24 +03:00
|
|
|
log.Debugf("Router advertising %d best routes out of %d", len(bestRoutes), len(routes))
|
|
|
|
|
2019-09-05 15:23:33 +03:00
|
|
|
// build a list of events to advertise
|
2019-10-09 21:08:24 +03:00
|
|
|
events := make([]*Event, len(bestRoutes))
|
2019-10-09 19:23:02 +03:00
|
|
|
i := 0
|
2019-10-09 21:08:24 +03:00
|
|
|
for _, route := range bestRoutes {
|
2019-09-05 15:23:33 +03:00
|
|
|
event := &Event{
|
2019-09-05 18:04:44 +03:00
|
|
|
Type: evType,
|
2019-09-05 15:23:33 +03:00
|
|
|
Timestamp: time.Now(),
|
|
|
|
Route: route,
|
|
|
|
}
|
|
|
|
events[i] = event
|
2019-10-09 19:23:02 +03:00
|
|
|
i++
|
2019-09-05 15:23:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return events, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Solicit advertises all of its routes to the network
|
|
|
|
// It returns error if the router fails to list the routes
|
|
|
|
func (r *router) Solicit() error {
|
2019-09-05 18:04:44 +03:00
|
|
|
events, err := r.flushRouteEvents(Update)
|
2019-09-05 15:23:33 +03:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed solicit routes: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// advertise the routes
|
|
|
|
r.advertWg.Add(1)
|
|
|
|
go r.publishAdvert(RouteUpdate, events)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-08-21 23:10:42 +03:00
|
|
|
// Lookup routes in the routing table
|
2019-10-09 19:13:52 +03:00
|
|
|
func (r *router) Lookup(q ...QueryOption) ([]Route, error) {
|
|
|
|
return r.table.Query(q...)
|
2019-07-29 20:57:40 +03:00
|
|
|
}
|
|
|
|
|
2019-08-21 23:10:42 +03:00
|
|
|
// Watch routes
|
2019-07-29 20:57:40 +03:00
|
|
|
func (r *router) Watch(opts ...WatchOption) (Watcher, error) {
|
|
|
|
return r.table.Watch(opts...)
|
|
|
|
}
|
|
|
|
|
2019-06-28 20:35:53 +03:00
|
|
|
// Status returns router status
|
|
|
|
func (r *router) Status() Status {
|
|
|
|
r.RLock()
|
|
|
|
defer r.RUnlock()
|
|
|
|
|
2019-06-29 02:46:22 +03:00
|
|
|
// make a copy of the status
|
|
|
|
status := r.status
|
|
|
|
|
|
|
|
return status
|
2019-06-28 20:35:53 +03:00
|
|
|
}
|
|
|
|
|
2019-06-12 01:59:25 +03:00
|
|
|
// Stop stops the router
|
|
|
|
func (r *router) Stop() error {
|
2019-07-24 19:16:52 +03:00
|
|
|
r.Lock()
|
2019-08-21 23:10:42 +03:00
|
|
|
defer r.Unlock()
|
2019-07-29 20:57:40 +03:00
|
|
|
|
2019-08-21 23:10:42 +03:00
|
|
|
switch r.status.Code {
|
|
|
|
case Stopped, Error:
|
|
|
|
return r.status.Error
|
|
|
|
case Running, Advertising:
|
|
|
|
// close all the channels
|
|
|
|
r.close()
|
2019-06-29 02:46:22 +03:00
|
|
|
}
|
2019-07-01 17:43:50 +03:00
|
|
|
|
|
|
|
// wait for all goroutines to finish
|
|
|
|
r.wg.Wait()
|
2019-06-19 20:01:48 +03:00
|
|
|
|
2019-06-12 01:59:25 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-06-07 01:29:24 +03:00
|
|
|
// String prints debugging information about router
|
2019-07-10 09:45:27 +03:00
|
|
|
func (r *router) String() string {
|
2019-08-21 23:10:42 +03:00
|
|
|
return "memory"
|
2019-06-06 18:37:40 +03:00
|
|
|
}
|