Merge pull request #591 from milosgajdos83/advert-damp

[WIP] Fixes advert route event dampening behaviour
This commit is contained in:
Asim Aslam 2019-07-17 08:12:35 -07:00 committed by GitHub
commit 96cf14ed53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 106 additions and 99 deletions

View File

@ -18,17 +18,24 @@ const (
// AdvertiseTableTick is time interval in which router advertises all routes found in routing table // AdvertiseTableTick is time interval in which router advertises all routes found in routing table
AdvertiseTableTick = 1 * time.Minute AdvertiseTableTick = 1 * time.Minute
// AdvertSuppress is advert suppression threshold // AdvertSuppress is advert suppression threshold
AdvertSuppress = 2000 AdvertSuppress = 2000.0
// AdvertRecover is advert recovery threshold // AdvertRecover is advert recovery threshold
AdvertRecover = 750 AdvertRecover = 750.0
// DefaultAdvertTTL is default advertisement TTL // DefaultAdvertTTL is default advertisement TTL
DefaultAdvertTTL = 1 * time.Minute DefaultAdvertTTL = 1 * time.Minute
// PenaltyDecay is the penalty decay // DeletePenalty penalises route deletion
PenaltyDecay = 1.15 DeletePenalty = 1000.0
// Delete penalises route addition and deletion
Delete = 1000
// UpdatePenalty penalises route updates // UpdatePenalty penalises route updates
UpdatePenalty = 500 UpdatePenalty = 500.0
// PenaltyHalfLife is the time the advert penalty decays to half its value
PenaltyHalfLife = 2.0
// MaxSuppressTime defines time after which the suppressed advert is deleted
MaxSuppressTime = 5 * time.Minute
)
var (
// PenaltyDecay is a coefficient which controls the speed the advert penalty decays
PenaltyDecay = math.Log(2) / PenaltyHalfLife
) )
// router provides default router implementation // router provides default router implementation
@ -153,9 +160,9 @@ func (r *router) manageRegistryRoutes(reg registry.Registry, action string) erro
return nil return nil
} }
// watchServices watches services in given registry and updates the routing table accordingly. // watchRegistry watches registry and updates routing table based on the received events.
// It returns error if the service registry watcher stops or if the routing table can't be updated. // It returns error if either the registry watcher fails with error or if the routing table update fails.
func (r *router) watchServices(w registry.Watcher) error { func (r *router) watchRegistry(w registry.Watcher) error {
// wait in the background for the router to stop // wait in the background for the router to stop
// when the router stops, stop the watcher and exit // when the router stops, stop the watcher and exit
r.wg.Add(1) r.wg.Add(1)
@ -266,43 +273,26 @@ func (r *router) advertiseTable() error {
// advertise all routes as Update events to subscribers // advertise all routes as Update events to subscribers
if len(events) > 0 { if len(events) > 0 {
go func() {
r.advertWg.Add(1) r.advertWg.Add(1)
r.advertiseEvents(Update, events) go r.advertiseEvents(Update, events)
}()
} }
case <-r.exit: case <-r.exit:
return nil return nil
} }
} }
return nil
} }
// isFlapping detects if the event is flapping based on the current and previous event status. // routeAdvert contains a list of route events to be advertised
func isFlapping(curr, prev *table.Event) bool { type routeAdvert struct {
if curr.Type == table.Update && prev.Type == table.Update { events []*table.Event
return true // lastUpdate records the time of the last advert update
} lastUpdate time.Time
// penalty is current advert penalty
if curr.Type == table.Create && prev.Type == table.Delete || curr.Type == table.Delete && prev.Type == table.Create {
return true
}
return false
}
// advertEvent is a table event enriched with advertisement data
type advertEvent struct {
*table.Event
// timestamp marks the time the event has been received
timestamp time.Time
// penalty is current event penalty
penalty float64 penalty float64
// isSuppressed flags if the event should be considered for flap detection // isSuppressed flags the advert suppression
isSuppressed bool isSuppressed bool
// isFlapping marks the event as flapping event // suppressTime records the time interval the advert has been suppressed for
isFlapping bool suppressTime time.Time
} }
// processEvents processes routing table events. // processEvents processes routing table events.
@ -310,22 +300,44 @@ type advertEvent struct {
func (r *router) processEvents() error { func (r *router) processEvents() error {
// ticker to periodically scan event for advertising // ticker to periodically scan event for advertising
ticker := time.NewTicker(AdvertiseEventsTick) ticker := time.NewTicker(AdvertiseEventsTick)
// eventMap is a map of advert events // advertMap is a map of advert events
eventMap := make(map[uint64]*advertEvent) advertMap := make(map[uint64]*routeAdvert)
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
var events []*table.Event var events []*table.Event
// collect all events which are not flapping // collect all events which are not flapping
// TODO: decay the events and update suppression for key, advert := range advertMap {
for key, event := range eventMap { // decay the event penalty
if !event.isFlapping && !event.isSuppressed { delta := time.Since(advert.lastUpdate).Seconds()
advert.penalty = advert.penalty * math.Exp(-delta*PenaltyDecay)
// suppress/recover the event based on its penalty level
switch {
case advert.penalty > AdvertSuppress && !advert.isSuppressed:
advert.isSuppressed = true
advert.suppressTime = time.Now()
case advert.penalty < AdvertRecover && advert.isSuppressed:
advert.isSuppressed = false
}
// max suppression time threshold has been reached, delete the advert
if advert.isSuppressed {
if time.Since(advert.suppressTime) > MaxSuppressTime {
delete(advertMap, key)
continue
}
}
if !advert.isSuppressed {
for _, event := range advert.events {
e := new(table.Event) e := new(table.Event)
*e = *event.Event *e = *event
events = append(events, e) events = append(events, e)
// this deletes the advertised event from the map // delete the advert from the advertMap
delete(eventMap, key) delete(advertMap, key)
}
} }
} }
@ -335,8 +347,6 @@ func (r *router) processEvents() error {
go r.advertiseEvents(Update, events) go r.advertiseEvents(Update, events)
} }
case e := <-r.eventChan: case e := <-r.eventChan:
// event timestamp
now := time.Now()
// if event is nil, continue // if event is nil, continue
if e == nil { if e == nil {
continue continue
@ -348,36 +358,36 @@ func (r *router) processEvents() error {
case table.Update: case table.Update:
penalty = UpdatePenalty penalty = UpdatePenalty
case table.Delete: case table.Delete:
penalty = Delete penalty = DeletePenalty
} }
// we use route hash as eventMap key
// check if we have already registered the route
// we use the route hash as advertMap key
hash := e.Route.Hash() hash := e.Route.Hash()
event, ok := eventMap[hash] advert, ok := advertMap[hash]
if !ok { if !ok {
event = &advertEvent{ events := []*table.Event{e}
Event: e, advert = &routeAdvert{
events: events,
penalty: penalty, penalty: penalty,
timestamp: time.Now(), lastUpdate: time.Now(),
} }
eventMap[hash] = event advertMap[hash] = advert
continue continue
} }
// update penalty for existing event: decay existing and add new penalty
delta := time.Since(event.timestamp).Seconds()
event.penalty = event.penalty*math.Exp(-delta) + penalty
event.timestamp = now
// suppress or recover the event based on its current penalty // attempt to squash last two events if possible
if !event.isSuppressed && event.penalty > AdvertSuppress { lastEvent := advert.events[len(advert.events)-1]
event.isSuppressed = true if lastEvent.Type == e.Type {
} else if event.penalty < AdvertRecover { advert.events[len(advert.events)-1] = e
event.isSuppressed = false } else {
} advert.events = append(advert.events, e)
// if not suppressed decide if if its flapping
if !event.isSuppressed {
// detect if its flapping by comparing current and previous event
event.isFlapping = isFlapping(e, event.Event)
} }
// update event penalty and recorded timestamp
advert.lastUpdate = time.Now()
advert.penalty += penalty
case <-r.exit: case <-r.exit:
// first wait for the advertiser to finish // first wait for the advertiser to finish
r.advertWg.Wait() r.advertWg.Wait()
@ -386,10 +396,6 @@ func (r *router) processEvents() error {
return nil return nil
} }
} }
// we probably never reach this code path
return nil
} }
// watchErrors watches router errors and takes appropriate actions // watchErrors watches router errors and takes appropriate actions
@ -484,8 +490,9 @@ func (r *router) Advertise() (<-chan *Advert, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("failed creating routing table watcher: %v", err) return nil, fmt.Errorf("failed creating routing table watcher: %v", err)
} }
// service registry watcher
svcWatcher, err := r.opts.Registry.Watch() // registry watcher
regWatcher, err := r.opts.Registry.Watch()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed creating service registry watcher: %v", err) return nil, fmt.Errorf("failed creating service registry watcher: %v", err)
} }
@ -497,7 +504,7 @@ func (r *router) Advertise() (<-chan *Advert, error) {
go func() { go func() {
defer r.wg.Done() defer r.wg.Done()
// watch local registry and register routes in routine table // watch local registry and register routes in routine table
errChan <- r.watchServices(svcWatcher) errChan <- r.watchRegistry(regWatcher)
}() }()
r.wg.Add(1) r.wg.Add(1)
@ -594,5 +601,5 @@ func (r *router) Stop() error {
// String prints debugging information about router // String prints debugging information about router
func (r *router) String() string { func (r *router) String() string {
return "router" return "default"
} }

View File

@ -7,14 +7,14 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
) )
// TableOptions specify routing table options // Options specify routing table options
// TODO: table options TBD in the future // TODO: table options TBD in the future
type TableOptions struct{} type Options struct{}
// table is an in memory routing table // table is an in memory routing table
type table struct { type table struct {
// opts are table options // opts are table options
opts TableOptions opts Options
// m stores routing table map // m stores routing table map
m map[string]map[uint64]Route m map[string]map[uint64]Route
// w is a list of table watchers // w is a list of table watchers
@ -23,9 +23,9 @@ type table struct {
} }
// newTable creates a new routing table and returns it // newTable creates a new routing table and returns it
func newTable(opts ...TableOption) Table { func newTable(opts ...Option) Table {
// default options // default options
var options TableOptions var options Options
// apply requested options // apply requested options
for _, o := range opts { for _, o := range opts {
@ -40,7 +40,7 @@ func newTable(opts ...TableOption) Table {
} }
// Init initializes routing table with options // Init initializes routing table with options
func (t *table) Init(opts ...TableOption) error { func (t *table) Init(opts ...Option) error {
for _, o := range opts { for _, o := range opts {
o(&t.opts) o(&t.opts)
} }
@ -48,7 +48,7 @@ func (t *table) Init(opts ...TableOption) error {
} }
// Options returns routing table options // Options returns routing table options
func (t *table) Options() TableOptions { func (t *table) Options() Options {
return t.opts return t.opts
} }
@ -219,7 +219,7 @@ func (t *table) Size() int {
defer t.RUnlock() defer t.RUnlock()
size := 0 size := 0
for dest, _ := range t.m { for dest := range t.m {
size += len(t.m[dest]) size += len(t.m[dest])
} }
@ -227,6 +227,6 @@ func (t *table) Size() int {
} }
// String returns debug information // String returns debug information
func (t table) String() string { func (t *table) String() string {
return "table" return "default"
} }

View File

@ -23,7 +23,7 @@ func TestCreate(t *testing.T) {
if err := table.Create(route); err != nil { if err := table.Create(route); err != nil {
t.Errorf("error adding route: %s", err) t.Errorf("error adding route: %s", err)
} }
testTableSize += 1 testTableSize++
// adds new route for the original destination // adds new route for the original destination
route.Gateway = "dest.gw2" route.Gateway = "dest.gw2"
@ -31,7 +31,7 @@ func TestCreate(t *testing.T) {
if err := table.Create(route); err != nil { if err := table.Create(route); err != nil {
t.Errorf("error adding route: %s", err) t.Errorf("error adding route: %s", err)
} }
testTableSize += 1 testTableSize++
if table.Size() != testTableSize { if table.Size() != testTableSize {
t.Errorf("invalid number of routes. Expected: %d, found: %d", testTableSize, table.Size()) t.Errorf("invalid number of routes. Expected: %d, found: %d", testTableSize, table.Size())
@ -50,7 +50,7 @@ func TestDelete(t *testing.T) {
if err := table.Create(route); err != nil { if err := table.Create(route); err != nil {
t.Errorf("error adding route: %s", err) t.Errorf("error adding route: %s", err)
} }
testTableSize += 1 testTableSize++
// should fail to delete non-existant route // should fail to delete non-existant route
prevSvc := route.Service prevSvc := route.Service
@ -66,7 +66,7 @@ func TestDelete(t *testing.T) {
if err := table.Delete(route); err != nil { if err := table.Delete(route); err != nil {
t.Errorf("error deleting route: %s", err) t.Errorf("error deleting route: %s", err)
} }
testTableSize -= 1 testTableSize--
if table.Size() != testTableSize { if table.Size() != testTableSize {
t.Errorf("invalid number of routes. Expected: %d, found: %d", testTableSize, table.Size()) t.Errorf("invalid number of routes. Expected: %d, found: %d", testTableSize, table.Size())
@ -80,7 +80,7 @@ func TestUpdate(t *testing.T) {
if err := table.Create(route); err != nil { if err := table.Create(route); err != nil {
t.Errorf("error adding route: %s", err) t.Errorf("error adding route: %s", err)
} }
testTableSize += 1 testTableSize++
// change the metric of the original route // change the metric of the original route
route.Metric = 200 route.Metric = 200
@ -100,7 +100,7 @@ func TestUpdate(t *testing.T) {
if err := table.Update(route); err != nil { if err := table.Update(route); err != nil {
t.Errorf("error updating route: %s", err) t.Errorf("error updating route: %s", err)
} }
testTableSize += 1 testTableSize++
if table.Size() != testTableSize { if table.Size() != testTableSize {
t.Errorf("invalid number of routes. Expected: %d, found: %d", testTableSize, table.Size()) t.Errorf("invalid number of routes. Expected: %d, found: %d", testTableSize, table.Size())

View File

@ -29,10 +29,10 @@ type Table interface {
Size() int Size() int
} }
// TableOption used by the routing table // Option used by the routing table
type TableOption func(*TableOptions) type Option func(*Options)
// NewTable creates new routing table and returns it // NewTable creates new routing table and returns it
func NewTable(opts ...TableOption) Table { func NewTable(opts ...Option) Table {
return newTable(opts...) return newTable(opts...)
} }