Router refresh (#1912)
* checkpoint * Refresh and prune routes periodically in the registry router * remove comment
This commit is contained in:
parent
d69a4a30cd
commit
324c4e6886
@ -14,6 +14,10 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
// RefreshInterval is the time at which we completely refresh the table
|
||||
RefreshInterval = time.Second * 120
|
||||
// PruneInterval is how often we prune the routing table
|
||||
PruneInterval = time.Second * 10
|
||||
// AdvertiseEventsTick is time interval in which the router advertises route updates
|
||||
AdvertiseEventsTick = 10 * time.Second
|
||||
// DefaultAdvertTTL is default advertisement TTL
|
||||
@ -55,7 +59,7 @@ func NewRouter(opts ...router.Option) router.Router {
|
||||
|
||||
// create the new table, passing the fetchRoute method in as a fallback if
|
||||
// the table doesn't contain the result for a query.
|
||||
r.table = newTable(r.fetchRoutes)
|
||||
r.table = newTable(r.lookup)
|
||||
|
||||
// start the router
|
||||
r.start()
|
||||
@ -96,6 +100,20 @@ func (r *rtr) Table() router.Table {
|
||||
return r.table
|
||||
}
|
||||
|
||||
func getDomain(srv *registry.Service) string {
|
||||
// check the service metadata for domain
|
||||
// TODO: domain as Domain field in registry?
|
||||
if srv.Metadata != nil && len(srv.Metadata["domain"]) > 0 {
|
||||
return srv.Metadata["domain"]
|
||||
} else if len(srv.Nodes) > 0 && srv.Nodes[0].Metadata != nil {
|
||||
return srv.Nodes[0].Metadata["domain"]
|
||||
}
|
||||
|
||||
// otherwise return wildcard
|
||||
// TODO: return GlobalDomain or PublicDomain
|
||||
return registry.WildcardDomain
|
||||
}
|
||||
|
||||
// manageRoute applies action on a given route
|
||||
func (r *rtr) manageRoute(route router.Route, action string) error {
|
||||
switch action {
|
||||
@ -118,15 +136,12 @@ func (r *rtr) manageRoute(route router.Route, action string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// manageServiceRoutes applies action to all routes of the service.
|
||||
// It returns error of the action fails with error.
|
||||
func (r *rtr) manageRoutes(service *registry.Service, action, network string) error {
|
||||
// action is the routing table action
|
||||
action = strings.ToLower(action)
|
||||
// createRoutes turns a service into a list routes basically converting nodes to routes
|
||||
func (r *rtr) createRoutes(service *registry.Service, network string) []router.Route {
|
||||
var routes []router.Route
|
||||
|
||||
// take route action on each service node
|
||||
for _, node := range service.Nodes {
|
||||
route := router.Route{
|
||||
routes = append(routes, router.Route{
|
||||
Service: service.Name,
|
||||
Address: node.Address,
|
||||
Gateway: "",
|
||||
@ -135,8 +150,33 @@ func (r *rtr) manageRoutes(service *registry.Service, action, network string) er
|
||||
Link: router.DefaultLink,
|
||||
Metric: router.DefaultLocalMetric,
|
||||
Metadata: node.Metadata,
|
||||
})
|
||||
}
|
||||
|
||||
return routes
|
||||
}
|
||||
|
||||
// manageServiceRoutes applies action to all routes of the service.
|
||||
// It returns error of the action fails with error.
|
||||
func (r *rtr) manageRoutes(service *registry.Service, action, network string) error {
|
||||
// action is the routing table action
|
||||
action = strings.ToLower(action)
|
||||
|
||||
// create a set of routes from the service
|
||||
routes := r.createRoutes(service, network)
|
||||
|
||||
// if its a delete action and there's no nodes
|
||||
// it means we need to wipe out all the routes
|
||||
// for that service
|
||||
if action == "delete" && len(routes) == 0 {
|
||||
// delete the service entirely
|
||||
r.table.deleteService(service.Name, network)
|
||||
return nil
|
||||
}
|
||||
|
||||
// create the routes in the table
|
||||
for _, route := range routes {
|
||||
logger.Tracef("Creating route %v domain: %v", route, network)
|
||||
if err := r.manageRoute(route, action); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -147,7 +187,7 @@ func (r *rtr) manageRoutes(service *registry.Service, action, network string) er
|
||||
|
||||
// manageRegistryRoutes applies action to all routes of each service found in the registry.
|
||||
// It returns error if either the services failed to be listed or the routing table action fails.
|
||||
func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error {
|
||||
func (r *rtr) loadRoutes(reg registry.Registry) error {
|
||||
services, err := reg.ListServices(registry.ListDomain(registry.WildcardDomain))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed listing services: %v", err)
|
||||
@ -156,20 +196,16 @@ func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error {
|
||||
// add each service node as a separate route
|
||||
for _, service := range services {
|
||||
// get the services domain from metadata. Fallback to wildcard.
|
||||
var domain string
|
||||
domain := getDomain(service)
|
||||
|
||||
if service.Metadata != nil && len(service.Metadata["domain"]) > 0 {
|
||||
domain = service.Metadata["domain"]
|
||||
} else {
|
||||
domain = registry.WildcardDomain
|
||||
}
|
||||
// create the routes
|
||||
routes := r.createRoutes(service, domain)
|
||||
|
||||
// we already have nodes
|
||||
if len(service.Nodes) > 0 {
|
||||
logger.Tracef("Creating route %v domain: %v", service, domain)
|
||||
if err := r.manageRoutes(service, action, domain); err != nil {
|
||||
logger.Tracef("Failed to manage route for %v domain: %v", service, domain)
|
||||
}
|
||||
// if the routes exist save them
|
||||
if len(routes) > 0 {
|
||||
logger.Tracef("Creating routes for service %v domain: %v", service, domain)
|
||||
// save the routes without pumping out events
|
||||
r.table.saveRoutes(service.Name, routes)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -184,10 +220,11 @@ func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error {
|
||||
|
||||
// manage the routes for all returned services
|
||||
for _, srv := range srvs {
|
||||
logger.Tracef("Creating route %v domain: %v", srv, domain)
|
||||
if err := r.manageRoutes(srv, action, domain); err != nil {
|
||||
logger.Tracef("Failed to manage route for %v domain: %v", srv, domain)
|
||||
continue
|
||||
routes := r.createRoutes(srv, domain)
|
||||
|
||||
if len(routes) > 0 {
|
||||
logger.Tracef("Creating routes for service %v domain: %v", srv, domain)
|
||||
r.table.saveRoutes(srv.Name, routes)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -195,40 +232,29 @@ func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// fetchRoutes retrieves all the routes for a given service and creates them in the routing table
|
||||
func (r *rtr) fetchRoutes(service string) error {
|
||||
// lookup retrieves all the routes for a given service and creates them in the routing table
|
||||
func (r *rtr) lookup(service string) ([]router.Route, error) {
|
||||
logger.Tracef("Fetching route for %s domain: %v", service, registry.WildcardDomain)
|
||||
|
||||
services, err := r.options.Registry.GetService(service, registry.GetDomain(registry.WildcardDomain))
|
||||
if err == registry.ErrNotFound {
|
||||
logger.Tracef("Failed to find route for %s", service)
|
||||
return nil
|
||||
return nil, router.ErrRouteNotFound
|
||||
} else if err != nil {
|
||||
logger.Tracef("Failed to find route for %s: %v", service, err)
|
||||
return fmt.Errorf("failed getting services: %v", err)
|
||||
return nil, fmt.Errorf("failed getting services: %v", err)
|
||||
}
|
||||
|
||||
var routes []router.Route
|
||||
|
||||
for _, srv := range services {
|
||||
var domain string
|
||||
|
||||
// since a wildcard query was performed, the service could belong
|
||||
// to one of many namespaces, to get this information we check
|
||||
// the node metadata. TODO: Add Domain to registry.Service
|
||||
if srv.Metadata != nil && len(srv.Metadata["domain"]) > 0 {
|
||||
domain = srv.Metadata["domain"]
|
||||
} else if len(srv.Nodes) > 0 && srv.Nodes[0].Metadata != nil {
|
||||
domain = srv.Nodes[0].Metadata["domain"]
|
||||
} else {
|
||||
domain = registry.WildcardDomain
|
||||
domain := getDomain(srv)
|
||||
// TODO: should we continue to send the event indicating we created a route?
|
||||
// lookup is only called in the query path so probably not
|
||||
routes = append(routes, r.createRoutes(srv, domain)...)
|
||||
}
|
||||
|
||||
logger.Tracef("Creating route %v domain: %v", srv, domain)
|
||||
if err := r.manageRoutes(srv, "create", domain); err != nil {
|
||||
logger.Tracef("Failed to create route for %v domain %v: %v", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
// watchRegistry watches registry and updates routing table based on the received events.
|
||||
@ -252,6 +278,7 @@ func (r *rtr) watchRegistry(w registry.Watcher) error {
|
||||
}()
|
||||
|
||||
for {
|
||||
// get the next service
|
||||
res, err := w.Next()
|
||||
if err != nil {
|
||||
if err != registry.ErrWatcherStopped {
|
||||
@ -260,18 +287,15 @@ func (r *rtr) watchRegistry(w registry.Watcher) error {
|
||||
break
|
||||
}
|
||||
|
||||
// don't process nil entries
|
||||
if res.Service == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// get the services domain from metadata. Fallback to wildcard.
|
||||
var domain string
|
||||
if res.Service.Metadata != nil && len(res.Service.Metadata["domain"]) > 0 {
|
||||
domain = res.Service.Metadata["domain"]
|
||||
} else {
|
||||
domain = registry.WildcardDomain
|
||||
}
|
||||
domain := getDomain(res.Service)
|
||||
|
||||
// create/update or delete the route
|
||||
if err := r.manageRoutes(res.Service, res.Action, domain); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -496,8 +520,8 @@ func (r *rtr) start() error {
|
||||
|
||||
if r.options.Precache {
|
||||
// add all local service routes into the routing table
|
||||
if err := r.manageRegistryRoutes(r.options.Registry, "create"); err != nil {
|
||||
return fmt.Errorf("failed adding registry routes: %s", err)
|
||||
if err := r.loadRoutes(r.options.Registry); err != nil {
|
||||
return fmt.Errorf("failed loading registry routes: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -521,6 +545,28 @@ func (r *rtr) start() error {
|
||||
// create error and exit channels
|
||||
r.exit = make(chan bool)
|
||||
|
||||
// periodically refresh all the routes
|
||||
go func() {
|
||||
t1 := time.NewTicker(RefreshInterval)
|
||||
defer t1.Stop()
|
||||
|
||||
t2 := time.NewTicker(PruneInterval)
|
||||
defer t2.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-r.exit:
|
||||
return
|
||||
case <-t2.C:
|
||||
r.table.pruneRoutes(RefreshInterval)
|
||||
case <-t1.C:
|
||||
if err := r.loadRoutes(r.options.Registry); err != nil {
|
||||
logger.Errorf("failed refreshing registry routes: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
go func() {
|
||||
var err error
|
||||
var w registry.Watcher
|
||||
|
@ -12,23 +12,101 @@ import (
|
||||
// table is an in-memory routing table
|
||||
type table struct {
|
||||
sync.RWMutex
|
||||
// fetchRoutes for a service
|
||||
fetchRoutes func(string) error
|
||||
// lookup for a service
|
||||
lookup func(string) ([]router.Route, error)
|
||||
// routes stores service routes
|
||||
routes map[string]map[uint64]router.Route
|
||||
routes map[string]map[uint64]*route
|
||||
// watchers stores table watchers
|
||||
watchers map[string]*tableWatcher
|
||||
}
|
||||
|
||||
type route struct {
|
||||
route router.Route
|
||||
updated time.Time
|
||||
}
|
||||
|
||||
// newtable creates a new routing table and returns it
|
||||
func newTable(fetchRoutes func(string) error, opts ...router.Option) *table {
|
||||
func newTable(lookup func(string) ([]router.Route, error), opts ...router.Option) *table {
|
||||
return &table{
|
||||
fetchRoutes: fetchRoutes,
|
||||
routes: make(map[string]map[uint64]router.Route),
|
||||
lookup: lookup,
|
||||
routes: make(map[string]map[uint64]*route),
|
||||
watchers: make(map[string]*tableWatcher),
|
||||
}
|
||||
}
|
||||
|
||||
// pruneRoutes will prune routes older than the time specified
|
||||
func (t *table) pruneRoutes(olderThan time.Duration) {
|
||||
var routes []router.Route
|
||||
|
||||
t.Lock()
|
||||
|
||||
// search for all the routes
|
||||
for _, routeList := range t.routes {
|
||||
for _, r := range routeList {
|
||||
// if any route is older than
|
||||
if time.Since(r.updated).Seconds() > olderThan.Seconds() {
|
||||
routes = append(routes, r.route)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t.Unlock()
|
||||
|
||||
// delete the routes we've found
|
||||
for _, route := range routes {
|
||||
t.Delete(route)
|
||||
}
|
||||
}
|
||||
|
||||
// deleteService removes the entire service
|
||||
func (t *table) deleteService(service, network string) {
|
||||
t.Lock()
|
||||
|
||||
routes, ok := t.routes[service]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// delete the routes for the service
|
||||
for hash, rt := range routes {
|
||||
// TODO: check if this causes a problem
|
||||
// with * in the network if that is a thing
|
||||
// or blank strings
|
||||
if rt.route.Network != network {
|
||||
continue
|
||||
}
|
||||
delete(routes, hash)
|
||||
}
|
||||
|
||||
// delete the map for the service if its empty
|
||||
if len(routes) == 0 {
|
||||
delete(t.routes, service)
|
||||
return
|
||||
}
|
||||
|
||||
// save the routes
|
||||
t.routes[service] = routes
|
||||
|
||||
t.Unlock()
|
||||
}
|
||||
|
||||
// saveRoutes completely replaces the routes for a service
|
||||
func (t *table) saveRoutes(service string, routes []router.Route) {
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
|
||||
// delete old routes
|
||||
delete(t.routes, service)
|
||||
// make new map
|
||||
t.routes[service] = make(map[uint64]*route)
|
||||
|
||||
// iterate through new routes and save
|
||||
for _, rt := range routes {
|
||||
// save the new route
|
||||
t.routes[service][rt.Hash()] = &route{rt, time.Now()}
|
||||
}
|
||||
}
|
||||
|
||||
// sendEvent sends events to all subscribed watchers
|
||||
func (t *table) sendEvent(e *router.Event) {
|
||||
t.RLock()
|
||||
@ -58,20 +136,25 @@ func (t *table) Create(r router.Route) error {
|
||||
|
||||
// check if there are any routes in the table for the route destination
|
||||
if _, ok := t.routes[service]; !ok {
|
||||
t.routes[service] = make(map[uint64]router.Route)
|
||||
t.routes[service] = make(map[uint64]*route)
|
||||
}
|
||||
|
||||
// add new route to the table for the route destination
|
||||
if _, ok := t.routes[service][sum]; !ok {
|
||||
t.routes[service][sum] = r
|
||||
if _, ok := t.routes[service][sum]; ok {
|
||||
return router.ErrDuplicateRoute
|
||||
}
|
||||
|
||||
// create the route
|
||||
t.routes[service][sum] = &route{r, time.Now()}
|
||||
|
||||
if logger.V(logger.DebugLevel, logger.DefaultLogger) {
|
||||
logger.Debugf("Router emitting %s for route: %s", router.Create, r.Address)
|
||||
}
|
||||
go t.sendEvent(&router.Event{Type: router.Create, Timestamp: time.Now(), Route: r})
|
||||
return nil
|
||||
}
|
||||
|
||||
return router.ErrDuplicateRoute
|
||||
// send a route created event
|
||||
go t.sendEvent(&router.Event{Type: router.Create, Timestamp: time.Now(), Route: r})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete deletes the route from the routing table
|
||||
@ -90,10 +173,14 @@ func (t *table) Delete(r router.Route) error {
|
||||
return router.ErrRouteNotFound
|
||||
}
|
||||
|
||||
// delete the route from the service
|
||||
delete(t.routes[service], sum)
|
||||
|
||||
// delete the whole map if there are no routes left
|
||||
if len(t.routes[service]) == 0 {
|
||||
delete(t.routes, service)
|
||||
}
|
||||
|
||||
if logger.V(logger.DebugLevel, logger.DefaultLogger) {
|
||||
logger.Debugf("Router emitting %s for route: %s", router.Delete, r.Address)
|
||||
}
|
||||
@ -112,11 +199,13 @@ func (t *table) Update(r router.Route) error {
|
||||
|
||||
// check if the route destination has any routes in the table
|
||||
if _, ok := t.routes[service]; !ok {
|
||||
t.routes[service] = make(map[uint64]router.Route)
|
||||
t.routes[service] = make(map[uint64]*route)
|
||||
}
|
||||
|
||||
if _, ok := t.routes[service][sum]; !ok {
|
||||
t.routes[service][sum] = r
|
||||
// update the route
|
||||
t.routes[service][sum] = &route{r, time.Now()}
|
||||
|
||||
if logger.V(logger.DebugLevel, logger.DefaultLogger) {
|
||||
logger.Debugf("Router emitting %s for route: %s", router.Update, r.Address)
|
||||
}
|
||||
@ -125,7 +214,7 @@ func (t *table) Update(r router.Route) error {
|
||||
}
|
||||
|
||||
// just update the route, but dont emit Update event
|
||||
t.routes[service][sum] = r
|
||||
t.routes[service][sum] = &route{r, time.Now()}
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -138,7 +227,7 @@ func (t *table) List() ([]router.Route, error) {
|
||||
var routes []router.Route
|
||||
for _, rmap := range t.routes {
|
||||
for _, route := range rmap {
|
||||
routes = append(routes, route)
|
||||
routes = append(routes, route.route)
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,12 +276,21 @@ func isMatch(route router.Route, address, gateway, network, rtr string, strategy
|
||||
return true
|
||||
}
|
||||
|
||||
// findRoutes finds all the routes for given network and router and returns them
|
||||
func findRoutes(routes map[uint64]router.Route, address, gateway, network, rtr string, strategy router.Strategy) []router.Route {
|
||||
// filterRoutes finds all the routes for given network and router and returns them
|
||||
func filterRoutes(routes map[uint64]*route, opts router.QueryOptions) []router.Route {
|
||||
address := opts.Address
|
||||
gateway := opts.Gateway
|
||||
network := opts.Network
|
||||
rtr := opts.Router
|
||||
strategy := opts.Strategy
|
||||
|
||||
// routeMap stores the routes we're going to advertise
|
||||
routeMap := make(map[string][]router.Route)
|
||||
|
||||
for _, route := range routes {
|
||||
for _, rt := range routes {
|
||||
// get the actual route
|
||||
route := rt.route
|
||||
|
||||
if isMatch(route, address, gateway, network, rtr, strategy) {
|
||||
// add matchihg route to the routeMap
|
||||
routeKey := route.Service + "@" + route.Network
|
||||
@ -254,7 +352,7 @@ func (t *table) Query(q ...router.QueryOption) ([]router.Route, error) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
return findRoutes(routes, q.Address, q.Gateway, q.Network, q.Router, q.Strategy), true
|
||||
return filterRoutes(routes, q), true
|
||||
}
|
||||
|
||||
if opts.Service != "*" {
|
||||
@ -263,11 +361,17 @@ func (t *table) Query(q ...router.QueryOption) ([]router.Route, error) {
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
// load the cache and try again
|
||||
if err := t.fetchRoutes(opts.Service); err != nil {
|
||||
// lookup the route and try again
|
||||
// TODO: move this logic out of the hot path
|
||||
// being hammered on queries will require multiple lookups
|
||||
routes, err := t.lookup(opts.Service)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// cache the routes
|
||||
t.saveRoutes(opts.Service, routes)
|
||||
|
||||
// try again
|
||||
if routes, ok := readAndFilter(opts); ok {
|
||||
return routes, nil
|
||||
@ -278,9 +382,17 @@ func (t *table) Query(q ...router.QueryOption) ([]router.Route, error) {
|
||||
|
||||
// search through all destinations
|
||||
t.RLock()
|
||||
|
||||
for _, routes := range t.routes {
|
||||
results = append(results, findRoutes(routes, opts.Address, opts.Gateway, opts.Network, opts.Router, opts.Strategy)...)
|
||||
// filter the routes
|
||||
found := filterRoutes(routes, opts)
|
||||
// ensure we don't append zero length routes
|
||||
if len(found) == 0 {
|
||||
continue
|
||||
}
|
||||
results = append(results, found...)
|
||||
}
|
||||
|
||||
t.RUnlock()
|
||||
|
||||
return results, nil
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
|
||||
func testSetup() (*table, router.Route) {
|
||||
routr := NewRouter().(*rtr)
|
||||
table := newTable(routr.fetchRoutes)
|
||||
table := newTable(routr.lookup)
|
||||
|
||||
route := router.Route{
|
||||
Service: "dest.svc",
|
||||
@ -304,9 +304,8 @@ func TestFallback(t *testing.T) {
|
||||
Link: router.DefaultLink,
|
||||
Metric: router.DefaultLocalMetric,
|
||||
}
|
||||
r.table = newTable(func(s string) error {
|
||||
r.table.Create(route)
|
||||
return nil
|
||||
r.table = newTable(func(s string) ([]router.Route, error) {
|
||||
return []router.Route{route}, nil
|
||||
})
|
||||
r.start()
|
||||
|
||||
@ -338,8 +337,8 @@ func TestFallbackError(t *testing.T) {
|
||||
subscribers: make(map[string]chan *router.Advert),
|
||||
options: router.DefaultOptions(),
|
||||
}
|
||||
r.table = newTable(func(s string) error {
|
||||
return fmt.Errorf("ERROR")
|
||||
r.table = newTable(func(s string) ([]router.Route, error) {
|
||||
return nil, fmt.Errorf("ERROR")
|
||||
})
|
||||
r.start()
|
||||
_, err := r.Lookup(router.QueryService("go.micro.service.foo"))
|
||||
|
Loading…
Reference in New Issue
Block a user