From 92870d5d32724d915bb8e3699bc4437425a66140 Mon Sep 17 00:00:00 2001 From: Asim Aslam Date: Fri, 7 Aug 2020 20:53:38 +0100 Subject: [PATCH] Router refresh (#1912) * checkpoint * Refresh and prune routes periodically in the registry router * remove comment --- registry.go | 160 +++++++++++++++++++++++++++++++----------------- table.go | 166 ++++++++++++++++++++++++++++++++++++++++++-------- table_test.go | 11 ++-- 3 files changed, 247 insertions(+), 90 deletions(-) diff --git a/registry.go b/registry.go index 6b47621..5b27a45 100644 --- a/registry.go +++ b/registry.go @@ -14,6 +14,10 @@ import ( ) var ( + // RefreshInterval is the time at which we completely refresh the table + RefreshInterval = time.Second * 120 + // PruneInterval is how often we prune the routing table + PruneInterval = time.Second * 10 // AdvertiseEventsTick is time interval in which the router advertises route updates AdvertiseEventsTick = 10 * time.Second // DefaultAdvertTTL is default advertisement TTL @@ -55,7 +59,7 @@ func NewRouter(opts ...router.Option) router.Router { // create the new table, passing the fetchRoute method in as a fallback if // the table doesn't contain the result for a query. - r.table = newTable(r.fetchRoutes) + r.table = newTable(r.lookup) // start the router r.start() @@ -96,6 +100,20 @@ func (r *rtr) Table() router.Table { return r.table } +func getDomain(srv *registry.Service) string { + // check the service metadata for domain + // TODO: domain as Domain field in registry? + if srv.Metadata != nil && len(srv.Metadata["domain"]) > 0 { + return srv.Metadata["domain"] + } else if len(srv.Nodes) > 0 && srv.Nodes[0].Metadata != nil { + return srv.Nodes[0].Metadata["domain"] + } + + // otherwise return wildcard + // TODO: return GlobalDomain or PublicDomain + return registry.WildcardDomain +} + // manageRoute applies action on a given route func (r *rtr) manageRoute(route router.Route, action string) error { switch action { @@ -118,15 +136,12 @@ func (r *rtr) manageRoute(route router.Route, action string) error { return nil } -// manageServiceRoutes applies action to all routes of the service. -// It returns error of the action fails with error. -func (r *rtr) manageRoutes(service *registry.Service, action, network string) error { - // action is the routing table action - action = strings.ToLower(action) +// createRoutes turns a service into a list routes basically converting nodes to routes +func (r *rtr) createRoutes(service *registry.Service, network string) []router.Route { + var routes []router.Route - // take route action on each service node for _, node := range service.Nodes { - route := router.Route{ + routes = append(routes, router.Route{ Service: service.Name, Address: node.Address, Gateway: "", @@ -135,8 +150,33 @@ func (r *rtr) manageRoutes(service *registry.Service, action, network string) er Link: router.DefaultLink, Metric: router.DefaultLocalMetric, Metadata: node.Metadata, - } + }) + } + return routes +} + +// manageServiceRoutes applies action to all routes of the service. +// It returns error of the action fails with error. +func (r *rtr) manageRoutes(service *registry.Service, action, network string) error { + // action is the routing table action + action = strings.ToLower(action) + + // create a set of routes from the service + routes := r.createRoutes(service, network) + + // if its a delete action and there's no nodes + // it means we need to wipe out all the routes + // for that service + if action == "delete" && len(routes) == 0 { + // delete the service entirely + r.table.deleteService(service.Name, network) + return nil + } + + // create the routes in the table + for _, route := range routes { + logger.Tracef("Creating route %v domain: %v", route, network) if err := r.manageRoute(route, action); err != nil { return err } @@ -147,7 +187,7 @@ func (r *rtr) manageRoutes(service *registry.Service, action, network string) er // manageRegistryRoutes applies action to all routes of each service found in the registry. // It returns error if either the services failed to be listed or the routing table action fails. -func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error { +func (r *rtr) loadRoutes(reg registry.Registry) error { services, err := reg.ListServices(registry.ListDomain(registry.WildcardDomain)) if err != nil { return fmt.Errorf("failed listing services: %v", err) @@ -156,20 +196,16 @@ func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error { // add each service node as a separate route for _, service := range services { // get the services domain from metadata. Fallback to wildcard. - var domain string + domain := getDomain(service) - if service.Metadata != nil && len(service.Metadata["domain"]) > 0 { - domain = service.Metadata["domain"] - } else { - domain = registry.WildcardDomain - } + // create the routes + routes := r.createRoutes(service, domain) - // we already have nodes - if len(service.Nodes) > 0 { - logger.Tracef("Creating route %v domain: %v", service, domain) - if err := r.manageRoutes(service, action, domain); err != nil { - logger.Tracef("Failed to manage route for %v domain: %v", service, domain) - } + // if the routes exist save them + if len(routes) > 0 { + logger.Tracef("Creating routes for service %v domain: %v", service, domain) + // save the routes without pumping out events + r.table.saveRoutes(service.Name, routes) continue } @@ -184,10 +220,11 @@ func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error { // manage the routes for all returned services for _, srv := range srvs { - logger.Tracef("Creating route %v domain: %v", srv, domain) - if err := r.manageRoutes(srv, action, domain); err != nil { - logger.Tracef("Failed to manage route for %v domain: %v", srv, domain) - continue + routes := r.createRoutes(srv, domain) + + if len(routes) > 0 { + logger.Tracef("Creating routes for service %v domain: %v", srv, domain) + r.table.saveRoutes(srv.Name, routes) } } } @@ -195,40 +232,29 @@ func (r *rtr) manageRegistryRoutes(reg registry.Registry, action string) error { return nil } -// fetchRoutes retrieves all the routes for a given service and creates them in the routing table -func (r *rtr) fetchRoutes(service string) error { +// lookup retrieves all the routes for a given service and creates them in the routing table +func (r *rtr) lookup(service string) ([]router.Route, error) { logger.Tracef("Fetching route for %s domain: %v", service, registry.WildcardDomain) + services, err := r.options.Registry.GetService(service, registry.GetDomain(registry.WildcardDomain)) if err == registry.ErrNotFound { logger.Tracef("Failed to find route for %s", service) - return nil + return nil, router.ErrRouteNotFound } else if err != nil { logger.Tracef("Failed to find route for %s: %v", service, err) - return fmt.Errorf("failed getting services: %v", err) + return nil, fmt.Errorf("failed getting services: %v", err) } + var routes []router.Route + for _, srv := range services { - var domain string - - // since a wildcard query was performed, the service could belong - // to one of many namespaces, to get this information we check - // the node metadata. TODO: Add Domain to registry.Service - if srv.Metadata != nil && len(srv.Metadata["domain"]) > 0 { - domain = srv.Metadata["domain"] - } else if len(srv.Nodes) > 0 && srv.Nodes[0].Metadata != nil { - domain = srv.Nodes[0].Metadata["domain"] - } else { - domain = registry.WildcardDomain - } - - logger.Tracef("Creating route %v domain: %v", srv, domain) - if err := r.manageRoutes(srv, "create", domain); err != nil { - logger.Tracef("Failed to create route for %v domain %v: %v", err) - continue - } + domain := getDomain(srv) + // TODO: should we continue to send the event indicating we created a route? + // lookup is only called in the query path so probably not + routes = append(routes, r.createRoutes(srv, domain)...) } - return nil + return routes, nil } // watchRegistry watches registry and updates routing table based on the received events. @@ -252,6 +278,7 @@ func (r *rtr) watchRegistry(w registry.Watcher) error { }() for { + // get the next service res, err := w.Next() if err != nil { if err != registry.ErrWatcherStopped { @@ -260,18 +287,15 @@ func (r *rtr) watchRegistry(w registry.Watcher) error { break } + // don't process nil entries if res.Service == nil { continue } // get the services domain from metadata. Fallback to wildcard. - var domain string - if res.Service.Metadata != nil && len(res.Service.Metadata["domain"]) > 0 { - domain = res.Service.Metadata["domain"] - } else { - domain = registry.WildcardDomain - } + domain := getDomain(res.Service) + // create/update or delete the route if err := r.manageRoutes(res.Service, res.Action, domain); err != nil { return err } @@ -496,8 +520,8 @@ func (r *rtr) start() error { if r.options.Precache { // add all local service routes into the routing table - if err := r.manageRegistryRoutes(r.options.Registry, "create"); err != nil { - return fmt.Errorf("failed adding registry routes: %s", err) + if err := r.loadRoutes(r.options.Registry); err != nil { + return fmt.Errorf("failed loading registry routes: %s", err) } } @@ -521,6 +545,28 @@ func (r *rtr) start() error { // create error and exit channels r.exit = make(chan bool) + // periodically refresh all the routes + go func() { + t1 := time.NewTicker(RefreshInterval) + defer t1.Stop() + + t2 := time.NewTicker(PruneInterval) + defer t2.Stop() + + for { + select { + case <-r.exit: + return + case <-t2.C: + r.table.pruneRoutes(RefreshInterval) + case <-t1.C: + if err := r.loadRoutes(r.options.Registry); err != nil { + logger.Errorf("failed refreshing registry routes: %s", err) + } + } + } + }() + go func() { var err error var w registry.Watcher diff --git a/table.go b/table.go index 5bfee76..d695604 100644 --- a/table.go +++ b/table.go @@ -12,20 +12,98 @@ import ( // table is an in-memory routing table type table struct { sync.RWMutex - // fetchRoutes for a service - fetchRoutes func(string) error + // lookup for a service + lookup func(string) ([]router.Route, error) // routes stores service routes - routes map[string]map[uint64]router.Route + routes map[string]map[uint64]*route // watchers stores table watchers watchers map[string]*tableWatcher } +type route struct { + route router.Route + updated time.Time +} + // newtable creates a new routing table and returns it -func newTable(fetchRoutes func(string) error, opts ...router.Option) *table { +func newTable(lookup func(string) ([]router.Route, error), opts ...router.Option) *table { return &table{ - fetchRoutes: fetchRoutes, - routes: make(map[string]map[uint64]router.Route), - watchers: make(map[string]*tableWatcher), + lookup: lookup, + routes: make(map[string]map[uint64]*route), + watchers: make(map[string]*tableWatcher), + } +} + +// pruneRoutes will prune routes older than the time specified +func (t *table) pruneRoutes(olderThan time.Duration) { + var routes []router.Route + + t.Lock() + + // search for all the routes + for _, routeList := range t.routes { + for _, r := range routeList { + // if any route is older than + if time.Since(r.updated).Seconds() > olderThan.Seconds() { + routes = append(routes, r.route) + } + } + } + + t.Unlock() + + // delete the routes we've found + for _, route := range routes { + t.Delete(route) + } +} + +// deleteService removes the entire service +func (t *table) deleteService(service, network string) { + t.Lock() + + routes, ok := t.routes[service] + if !ok { + return + } + + // delete the routes for the service + for hash, rt := range routes { + // TODO: check if this causes a problem + // with * in the network if that is a thing + // or blank strings + if rt.route.Network != network { + continue + } + delete(routes, hash) + } + + // delete the map for the service if its empty + if len(routes) == 0 { + delete(t.routes, service) + return + } + + // save the routes + t.routes[service] = routes + + t.Unlock() +} + +// saveRoutes completely replaces the routes for a service +func (t *table) saveRoutes(service string, routes []router.Route) { + t.Lock() + defer t.Unlock() + + // delete old routes + delete(t.routes, service) + // make new map + t.routes[service] = make(map[uint64]*route) + + // iterate through new routes and save + for _, rt := range routes { + // save the new route + t.routes[service][rt.Hash()] = &route{rt, time.Now()} } } @@ -58,20 +136,25 @@ func (t *table) Create(r router.Route) error { // check if there are any routes in the table for the route destination if _, ok := t.routes[service]; !ok { - t.routes[service] = make(map[uint64]router.Route) + t.routes[service] = make(map[uint64]*route) } // add new route to the table for the route destination - if _, ok := t.routes[service][sum]; !ok { - t.routes[service][sum] = r - if logger.V(logger.DebugLevel, logger.DefaultLogger) { - logger.Debugf("Router emitting %s for route: %s", router.Create, r.Address) - } - go t.sendEvent(&router.Event{Type: router.Create, Timestamp: time.Now(), Route: r}) - return nil + if _, ok := t.routes[service][sum]; ok { + return router.ErrDuplicateRoute } - return router.ErrDuplicateRoute + // create the route + t.routes[service][sum] = &route{r, time.Now()} + + if logger.V(logger.DebugLevel, logger.DefaultLogger) { + logger.Debugf("Router emitting %s for route: %s", router.Create, r.Address) + } + + // send a route created event + go t.sendEvent(&router.Event{Type: router.Create, Timestamp: time.Now(), Route: r}) + + return nil } // Delete deletes the route from the routing table @@ -90,10 +173,14 @@ func (t *table) Delete(r router.Route) error { return router.ErrRouteNotFound } + // delete the route from the service delete(t.routes[service], sum) + + // delete the whole map if there are no routes left if len(t.routes[service]) == 0 { delete(t.routes, service) } + if logger.V(logger.DebugLevel, logger.DefaultLogger) { logger.Debugf("Router emitting %s for route: %s", router.Delete, r.Address) } @@ -112,11 +199,13 @@ func (t *table) Update(r router.Route) error { // check if the route destination has any routes in the table if _, ok := t.routes[service]; !ok { - t.routes[service] = make(map[uint64]router.Route) + t.routes[service] = make(map[uint64]*route) } if _, ok := t.routes[service][sum]; !ok { - t.routes[service][sum] = r + // update the route + t.routes[service][sum] = &route{r, time.Now()} + if logger.V(logger.DebugLevel, logger.DefaultLogger) { logger.Debugf("Router emitting %s for route: %s", router.Update, r.Address) } @@ -125,7 +214,7 @@ func (t *table) Update(r router.Route) error { } // just update the route, but dont emit Update event - t.routes[service][sum] = r + t.routes[service][sum] = &route{r, time.Now()} return nil } @@ -138,7 +227,7 @@ func (t *table) List() ([]router.Route, error) { var routes []router.Route for _, rmap := range t.routes { for _, route := range rmap { - routes = append(routes, route) + routes = append(routes, route.route) } } @@ -187,12 +276,21 @@ func isMatch(route router.Route, address, gateway, network, rtr string, strategy return true } -// findRoutes finds all the routes for given network and router and returns them -func findRoutes(routes map[uint64]router.Route, address, gateway, network, rtr string, strategy router.Strategy) []router.Route { +// filterRoutes finds all the routes for given network and router and returns them +func filterRoutes(routes map[uint64]*route, opts router.QueryOptions) []router.Route { + address := opts.Address + gateway := opts.Gateway + network := opts.Network + rtr := opts.Router + strategy := opts.Strategy + // routeMap stores the routes we're going to advertise routeMap := make(map[string][]router.Route) - for _, route := range routes { + for _, rt := range routes { + // get the actual route + route := rt.route + if isMatch(route, address, gateway, network, rtr, strategy) { // add matchihg route to the routeMap routeKey := route.Service + "@" + route.Network @@ -254,7 +352,7 @@ func (t *table) Query(q ...router.QueryOption) ([]router.Route, error) { return nil, false } - return findRoutes(routes, q.Address, q.Gateway, q.Network, q.Router, q.Strategy), true + return filterRoutes(routes, q), true } if opts.Service != "*" { @@ -263,11 +361,17 @@ func (t *table) Query(q ...router.QueryOption) ([]router.Route, error) { return routes, nil } - // load the cache and try again - if err := t.fetchRoutes(opts.Service); err != nil { + // lookup the route and try again + // TODO: move this logic out of the hot path + // being hammered on queries will require multiple lookups + routes, err := t.lookup(opts.Service) + if err != nil { return nil, err } + // cache the routes + t.saveRoutes(opts.Service, routes) + // try again if routes, ok := readAndFilter(opts); ok { return routes, nil @@ -278,9 +382,17 @@ func (t *table) Query(q ...router.QueryOption) ([]router.Route, error) { // search through all destinations t.RLock() + for _, routes := range t.routes { - results = append(results, findRoutes(routes, opts.Address, opts.Gateway, opts.Network, opts.Router, opts.Strategy)...) + // filter the routes + found := filterRoutes(routes, opts) + // ensure we don't append zero length routes + if len(found) == 0 { + continue + } + results = append(results, found...) } + t.RUnlock() return results, nil diff --git a/table_test.go b/table_test.go index 5e2590f..4fa4131 100644 --- a/table_test.go +++ b/table_test.go @@ -9,7 +9,7 @@ import ( func testSetup() (*table, router.Route) { routr := NewRouter().(*rtr) - table := newTable(routr.fetchRoutes) + table := newTable(routr.lookup) route := router.Route{ Service: "dest.svc", @@ -304,9 +304,8 @@ func TestFallback(t *testing.T) { Link: router.DefaultLink, Metric: router.DefaultLocalMetric, } - r.table = newTable(func(s string) error { - r.table.Create(route) - return nil + r.table = newTable(func(s string) ([]router.Route, error) { + return []router.Route{route}, nil }) r.start() @@ -338,8 +337,8 @@ func TestFallbackError(t *testing.T) { subscribers: make(map[string]chan *router.Advert), options: router.DefaultOptions(), } - r.table = newTable(func(s string) error { - return fmt.Errorf("ERROR") + r.table = newTable(func(s string) ([]router.Route, error) { + return nil, fmt.Errorf("ERROR") }) r.start() _, err := r.Lookup(router.QueryService("go.micro.service.foo"))