Cleanup and speedup network convergence along with direct messaging for connect and solicit
This commit is contained in:
@@ -71,9 +71,18 @@ type network struct {
|
||||
connected bool
|
||||
// closed closes the network
|
||||
closed chan bool
|
||||
// whether we've announced the first connect successfully
|
||||
// and received back some sort of peer message
|
||||
announced chan bool
|
||||
// whether we've discovered by the network
|
||||
discovered chan bool
|
||||
// solicted checks whether routes were solicited by one node
|
||||
solicited chan string
|
||||
}
|
||||
|
||||
// message is network message
|
||||
type message struct {
|
||||
// msg is transport message
|
||||
msg *transport.Message
|
||||
// session is tunnel session
|
||||
session tunnel.Session
|
||||
}
|
||||
|
||||
// newNetwork returns a new network node
|
||||
@@ -149,14 +158,16 @@ func newNetwork(opts ...Option) Network {
|
||||
address: peerAddress,
|
||||
peers: make(map[string]*node),
|
||||
},
|
||||
options: options,
|
||||
router: options.Router,
|
||||
proxy: options.Proxy,
|
||||
tunnel: options.Tunnel,
|
||||
server: server,
|
||||
client: client,
|
||||
tunClient: make(map[string]transport.Client),
|
||||
peerLinks: make(map[string]tunnel.Link),
|
||||
options: options,
|
||||
router: options.Router,
|
||||
proxy: options.Proxy,
|
||||
tunnel: options.Tunnel,
|
||||
server: server,
|
||||
client: client,
|
||||
tunClient: make(map[string]transport.Client),
|
||||
peerLinks: make(map[string]tunnel.Link),
|
||||
discovered: make(chan bool, 1),
|
||||
solicited: make(chan string, 1),
|
||||
}
|
||||
|
||||
network.node.network = network
|
||||
@@ -278,6 +289,14 @@ func (n *network) handleNetConn(s tunnel.Session, msg chan *message) {
|
||||
continue
|
||||
}
|
||||
|
||||
// check if peer is set
|
||||
peer := m.Header["Micro-Peer"]
|
||||
|
||||
// check who the message is intended for
|
||||
if len(peer) > 0 && peer != n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case msg <- &message{
|
||||
msg: m,
|
||||
@@ -367,26 +386,33 @@ func (n *network) processNetChan(listener tunnel.Listener) {
|
||||
// mark the time the message has been received
|
||||
now := time.Now()
|
||||
pbNetConnect := &pbNet.Connect{}
|
||||
|
||||
if err := proto.Unmarshal(m.msg.Body, pbNetConnect); err != nil {
|
||||
log.Debugf("Network tunnel [%s] connect unmarshal error: %v", NetworkChannel, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// don't process your own messages
|
||||
if pbNetConnect.Node.Id == n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("Network received connect message from: %s", pbNetConnect.Node.Id)
|
||||
|
||||
peer := &node{
|
||||
id: pbNetConnect.Node.Id,
|
||||
address: pbNetConnect.Node.Address,
|
||||
peers: make(map[string]*node),
|
||||
lastSeen: now,
|
||||
}
|
||||
|
||||
// update peer links
|
||||
log.Tracef("Network updating peer link %s for peer: %s", m.session.Link(), pbNetConnect.Node.Address)
|
||||
|
||||
if err := n.updatePeerLinks(pbNetConnect.Node.Address, m.session.Link()); err != nil {
|
||||
log.Debugf("Network failed updating peer links: %s", err)
|
||||
}
|
||||
|
||||
// add peer to the list of node peers
|
||||
if err := n.node.AddPeer(peer); err == ErrPeerExists {
|
||||
log.Debugf("Network peer exists, refreshing: %s", peer.id)
|
||||
@@ -394,50 +420,76 @@ func (n *network) processNetChan(listener tunnel.Listener) {
|
||||
if err := n.RefreshPeer(peer.id, now); err != nil {
|
||||
log.Debugf("Network failed refreshing peer %s: %v", peer.id, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// we send the peer message because someone has sent connect
|
||||
// and wants to know what's on the network. The faster we
|
||||
// respond the faster we start to converge
|
||||
|
||||
// get node peers down to MaxDepth encoded in protobuf
|
||||
msg := PeersToProto(n.node, MaxDepth)
|
||||
node := pbNetConnect.Node.Id
|
||||
|
||||
// advertise yourself to the network
|
||||
if err := n.sendMsg("peer", msg, NetworkChannel); err != nil {
|
||||
if err := n.sendTo("peer", NetworkChannel, node, msg); err != nil {
|
||||
log.Debugf("Network failed to advertise peers: %v", err)
|
||||
}
|
||||
|
||||
// advertise all the routes when a new node has connected
|
||||
if err := n.router.Solicit(); err != nil {
|
||||
log.Debugf("Network failed to solicit routes: %s", err)
|
||||
}
|
||||
|
||||
// specify that we're soliciting
|
||||
select {
|
||||
case n.solicited <- node:
|
||||
default:
|
||||
// don't block
|
||||
}
|
||||
case "peer":
|
||||
// mark the time the message has been received
|
||||
now := time.Now()
|
||||
pbNetPeer := &pbNet.Peer{}
|
||||
|
||||
if err := proto.Unmarshal(m.msg.Body, pbNetPeer); err != nil {
|
||||
log.Debugf("Network tunnel [%s] peer unmarshal error: %v", NetworkChannel, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// don't process your own messages
|
||||
if pbNetPeer.Node.Id == n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("Network received peer message from: %s %s", pbNetPeer.Node.Id, pbNetPeer.Node.Address)
|
||||
|
||||
peer := &node{
|
||||
id: pbNetPeer.Node.Id,
|
||||
address: pbNetPeer.Node.Address,
|
||||
peers: make(map[string]*node),
|
||||
lastSeen: now,
|
||||
}
|
||||
|
||||
// update peer links
|
||||
log.Tracef("Network updating peer link %s for peer: %s", m.session.Link(), pbNetPeer.Node.Address)
|
||||
|
||||
if err := n.updatePeerLinks(pbNetPeer.Node.Address, m.session.Link()); err != nil {
|
||||
log.Debugf("Network failed updating peer links: %s", err)
|
||||
}
|
||||
|
||||
if err := n.node.AddPeer(peer); err == nil {
|
||||
// send a solicit message when discovering new peer
|
||||
msg := &pbRtr.Solicit{
|
||||
Id: n.options.Id,
|
||||
}
|
||||
if err := n.sendMsg("solicit", msg, ControlChannel); err != nil {
|
||||
|
||||
node := pbNetPeer.Node.Id
|
||||
|
||||
// only solicit this peer
|
||||
if err := n.sendTo("solicit", ControlChannel, node, msg); err != nil {
|
||||
log.Debugf("Network failed to send solicit message: %s", err)
|
||||
}
|
||||
|
||||
continue
|
||||
// we're expecting any error to be ErrPeerExists
|
||||
} else if err != ErrPeerExists {
|
||||
@@ -446,6 +498,7 @@ func (n *network) processNetChan(listener tunnel.Listener) {
|
||||
}
|
||||
|
||||
log.Debugf("Network peer exists, refreshing: %s", pbNetPeer.Node.Id)
|
||||
|
||||
// update lastSeen time for the peer
|
||||
if err := n.RefreshPeer(pbNetPeer.Node.Id, now); err != nil {
|
||||
log.Debugf("Network failed refreshing peer %s: %v", pbNetPeer.Node.Id, err)
|
||||
@@ -458,12 +511,12 @@ func (n *network) processNetChan(listener tunnel.Listener) {
|
||||
log.Debugf("Network failed to update peers: %v", err)
|
||||
}
|
||||
|
||||
// check if we announced and were discovered
|
||||
// tell the connect loop that we've been discovered
|
||||
// so it stops sending connect messages out
|
||||
select {
|
||||
case <-n.announced:
|
||||
// we've sent the connect and received this response
|
||||
case n.discovered <- true:
|
||||
default:
|
||||
close(n.announced)
|
||||
// don't block here
|
||||
}
|
||||
case "close":
|
||||
pbNetClose := &pbNet.Close{}
|
||||
@@ -471,22 +524,28 @@ func (n *network) processNetChan(listener tunnel.Listener) {
|
||||
log.Debugf("Network tunnel [%s] close unmarshal error: %v", NetworkChannel, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// don't process your own messages
|
||||
if pbNetClose.Node.Id == n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("Network received close message from: %s", pbNetClose.Node.Id)
|
||||
|
||||
peer := &node{
|
||||
id: pbNetClose.Node.Id,
|
||||
address: pbNetClose.Node.Address,
|
||||
}
|
||||
|
||||
if err := n.DeletePeerNode(peer.id); err != nil {
|
||||
log.Debugf("Network failed to delete node %s routes: %v", peer.id, err)
|
||||
}
|
||||
|
||||
if err := n.prunePeerRoutes(peer); err != nil {
|
||||
log.Debugf("Network failed pruning peer %s routes: %v", peer.id, err)
|
||||
}
|
||||
// deelete peer from the peerLinks
|
||||
|
||||
// delete peer from the peerLinks
|
||||
n.Lock()
|
||||
delete(n.peerLinks, pbNetClose.Node.Address)
|
||||
n.Unlock()
|
||||
@@ -497,57 +556,6 @@ func (n *network) processNetChan(listener tunnel.Listener) {
|
||||
}
|
||||
}
|
||||
|
||||
// sendMsg sends a message to the tunnel channel
|
||||
func (n *network) sendMsg(method string, msg proto.Message, channel string) error {
|
||||
body, err := proto.Marshal(msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// create transport message and chuck it down the pipe
|
||||
m := transport.Message{
|
||||
Header: map[string]string{
|
||||
"Micro-Method": method,
|
||||
},
|
||||
Body: body,
|
||||
}
|
||||
|
||||
// check if the channel client is initialized
|
||||
n.RLock()
|
||||
client, ok := n.tunClient[channel]
|
||||
if !ok || client == nil {
|
||||
n.RUnlock()
|
||||
return ErrClientNotFound
|
||||
}
|
||||
n.RUnlock()
|
||||
|
||||
log.Debugf("Network sending %s message from: %s", method, n.options.Id)
|
||||
if err := client.Send(&m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// announce announces node peers to the network
|
||||
func (n *network) announce(client transport.Client) {
|
||||
announce := time.NewTicker(AnnounceTime)
|
||||
defer announce.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-n.closed:
|
||||
return
|
||||
case <-announce.C:
|
||||
msg := PeersToProto(n.node, MaxDepth)
|
||||
// advertise yourself to the network
|
||||
if err := n.sendMsg("peer", msg, NetworkChannel); err != nil {
|
||||
log.Debugf("Network failed to advertise peers: %v", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pruneRoutes prunes routes return by given query
|
||||
func (n *network) pruneRoutes(q ...router.QueryOption) error {
|
||||
routes, err := n.router.Table().Query(q...)
|
||||
@@ -585,9 +593,12 @@ func (n *network) prunePeerRoutes(peer *node) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// prune deltes node peers that have not been seen for longer than PruneTime seconds
|
||||
// prune also removes all the routes either originated by or routable by the stale nodes
|
||||
func (n *network) prune() {
|
||||
// manage the process of announcing to peers and prune any peer nodes that have not been
|
||||
// seen for a period of time. Also removes all the routes either originated by or routable
|
||||
//by the stale nodes
|
||||
func (n *network) manage() {
|
||||
announce := time.NewTicker(AnnounceTime)
|
||||
defer announce.Stop()
|
||||
prune := time.NewTicker(PruneTime)
|
||||
defer prune.Stop()
|
||||
|
||||
@@ -595,8 +606,14 @@ func (n *network) prune() {
|
||||
select {
|
||||
case <-n.closed:
|
||||
return
|
||||
case <-announce.C:
|
||||
msg := PeersToProto(n.node, MaxDepth)
|
||||
// advertise yourself to the network
|
||||
if err := n.sendMsg("peer", NetworkChannel, msg); err != nil {
|
||||
log.Debugf("Network failed to advertise peers: %v", err)
|
||||
}
|
||||
case <-prune.C:
|
||||
pruned := n.PruneStalePeerNodes(PruneTime)
|
||||
pruned := n.PruneStalePeers(PruneTime)
|
||||
for id, peer := range pruned {
|
||||
log.Debugf("Network peer exceeded prune time: %s", id)
|
||||
n.Lock()
|
||||
@@ -606,29 +623,90 @@ func (n *network) prune() {
|
||||
log.Debugf("Network failed pruning peer %s routes: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
// get a list of all routes
|
||||
routes, err := n.options.Router.Table().List()
|
||||
if err != nil {
|
||||
log.Debugf("Network failed listing routes when pruning peers: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// collect all the router IDs in the routing table
|
||||
routers := make(map[string]bool)
|
||||
|
||||
for _, route := range routes {
|
||||
if _, ok := routers[route.Router]; !ok {
|
||||
routers[route.Router] = true
|
||||
// if the router is NOT in our peer graph, delete all routes originated by it
|
||||
if peerNode := n.node.GetPeerNode(route.Router); peerNode == nil {
|
||||
if err := n.pruneRoutes(router.QueryRouter(route.Router)); err != nil {
|
||||
log.Debugf("Network failed deleting routes by %s: %v", route.Router, err)
|
||||
}
|
||||
}
|
||||
// check if its been processed
|
||||
if _, ok := routers[route.Router]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// mark as processed
|
||||
routers[route.Router] = true
|
||||
|
||||
// if the router is NOT in our peer graph, delete all routes originated by it
|
||||
if peer := n.node.GetPeerNode(route.Router); peer != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := n.pruneRoutes(router.QueryRouter(route.Router)); err != nil {
|
||||
log.Debugf("Network failed deleting routes by %s: %v", route.Router, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sendTo sends a message to a specific node as a one off.
|
||||
// we need this because when links die, we have no discovery info,
|
||||
// and sending to an existing multicast link doesn't immediately work
|
||||
func (n *network) sendTo(method, channel, peer string, msg proto.Message) error {
|
||||
body, err := proto.Marshal(msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c, err := n.tunnel.Dial(channel, tunnel.DialMode(tunnel.Multicast))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer c.Close()
|
||||
|
||||
log.Debugf("Network sending %s message from: %s to %s", method, n.options.Id, peer)
|
||||
|
||||
return c.Send(&transport.Message{
|
||||
Header: map[string]string{
|
||||
"Micro-Method": method,
|
||||
"Micro-Peer": peer,
|
||||
},
|
||||
Body: body,
|
||||
})
|
||||
}
|
||||
|
||||
// sendMsg sends a message to the tunnel channel
|
||||
func (n *network) sendMsg(method, channel string, msg proto.Message) error {
|
||||
body, err := proto.Marshal(msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// check if the channel client is initialized
|
||||
n.RLock()
|
||||
client, ok := n.tunClient[channel]
|
||||
if !ok || client == nil {
|
||||
n.RUnlock()
|
||||
return ErrClientNotFound
|
||||
}
|
||||
n.RUnlock()
|
||||
|
||||
log.Debugf("Network sending %s message from: %s", method, n.options.Id)
|
||||
|
||||
return client.Send(&transport.Message{
|
||||
Header: map[string]string{
|
||||
"Micro-Method": method,
|
||||
},
|
||||
Body: body,
|
||||
})
|
||||
}
|
||||
|
||||
// handleCtrlConn handles ControlChannel connections
|
||||
func (n *network) handleCtrlConn(s tunnel.Session, msg chan *message) {
|
||||
for {
|
||||
@@ -642,6 +720,14 @@ func (n *network) handleCtrlConn(s tunnel.Session, msg chan *message) {
|
||||
continue
|
||||
}
|
||||
|
||||
// check if peer is set
|
||||
peer := m.Header["Micro-Peer"]
|
||||
|
||||
// check who the message is intended for
|
||||
if len(peer) > 0 && peer != n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case msg <- &message{
|
||||
msg: m,
|
||||
@@ -770,15 +856,19 @@ func (n *network) processCtrlChan(listener tunnel.Listener) {
|
||||
switch m.msg.Header["Micro-Method"] {
|
||||
case "advert":
|
||||
pbRtrAdvert := &pbRtr.Advert{}
|
||||
|
||||
if err := proto.Unmarshal(m.msg.Body, pbRtrAdvert); err != nil {
|
||||
log.Debugf("Network fail to unmarshal advert message: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// don't process your own messages
|
||||
if pbRtrAdvert.Id == n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("Network received advert message from: %s", pbRtrAdvert.Id)
|
||||
|
||||
// loookup advertising node in our peer topology
|
||||
advertNode := n.node.GetPeerNode(pbRtrAdvert.Id)
|
||||
if advertNode == nil {
|
||||
@@ -788,6 +878,7 @@ func (n *network) processCtrlChan(listener tunnel.Listener) {
|
||||
}
|
||||
|
||||
var events []*router.Event
|
||||
|
||||
for _, event := range pbRtrAdvert.Events {
|
||||
// we know the advertising node is not the origin of the route
|
||||
if pbRtrAdvert.Id != event.Route.Router {
|
||||
@@ -798,6 +889,7 @@ func (n *network) processCtrlChan(listener tunnel.Listener) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
route := router.Route{
|
||||
Service: event.Route.Service,
|
||||
Address: event.Route.Address,
|
||||
@@ -807,6 +899,7 @@ func (n *network) processCtrlChan(listener tunnel.Listener) {
|
||||
Link: event.Route.Link,
|
||||
Metric: event.Route.Metric,
|
||||
}
|
||||
|
||||
// calculate route metric and add to the advertised metric
|
||||
// we need to make sure we do not overflow math.MaxInt64
|
||||
metric := n.getRouteMetric(event.Route.Router, event.Route.Gateway, event.Route.Link)
|
||||
@@ -829,11 +922,13 @@ func (n *network) processCtrlChan(listener tunnel.Listener) {
|
||||
}
|
||||
events = append(events, e)
|
||||
}
|
||||
|
||||
// if no events are eligible for processing continue
|
||||
if len(events) == 0 {
|
||||
log.Tracef("Network no events to be processed by router: %s", n.options.Id)
|
||||
continue
|
||||
}
|
||||
|
||||
// create an advert and process it
|
||||
advert := &router.Advert{
|
||||
Id: pbRtrAdvert.Id,
|
||||
@@ -853,16 +948,27 @@ func (n *network) processCtrlChan(listener tunnel.Listener) {
|
||||
log.Debugf("Network fail to unmarshal solicit message: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("Network received solicit message from: %s", pbRtrSolicit.Id)
|
||||
|
||||
// ignore solicitation when requested by you
|
||||
if pbRtrSolicit.Id == n.options.Id {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("Network router flushing routes for: %s", pbRtrSolicit.Id)
|
||||
|
||||
// advertise all the routes when a new node has connected
|
||||
if err := n.router.Solicit(); err != nil {
|
||||
log.Debugf("Network failed to solicit routes: %s", err)
|
||||
}
|
||||
|
||||
// specify that someone solicited the route
|
||||
select {
|
||||
case n.solicited <- pbRtrSolicit.Id:
|
||||
default:
|
||||
// don't block
|
||||
}
|
||||
}
|
||||
case <-n.closed:
|
||||
return
|
||||
@@ -879,6 +985,7 @@ func (n *network) advertise(advertChan <-chan *router.Advert) {
|
||||
case advert := <-advertChan:
|
||||
// create a proto advert
|
||||
var events []*pbRtr.Event
|
||||
|
||||
for _, event := range advert.Events {
|
||||
// the routes service address
|
||||
address := event.Route.Address
|
||||
@@ -912,16 +1019,33 @@ func (n *network) advertise(advertChan <-chan *router.Advert) {
|
||||
}
|
||||
events = append(events, e)
|
||||
}
|
||||
|
||||
msg := &pbRtr.Advert{
|
||||
Id: advert.Id,
|
||||
Type: pbRtr.AdvertType(advert.Type),
|
||||
Timestamp: advert.Timestamp.UnixNano(),
|
||||
Events: events,
|
||||
}
|
||||
if err := n.sendMsg("advert", msg, ControlChannel); err != nil {
|
||||
log.Debugf("Network failed to advertise routes: %v", err)
|
||||
|
||||
// send the advert to all on the control channel
|
||||
// since its not a solicitation
|
||||
if advert.Type != router.Solicitation {
|
||||
if err := n.sendMsg("advert", ControlChannel, msg); err != nil {
|
||||
log.Debugf("Network failed to advertise routes: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// it's a solication, someone asked for it
|
||||
// so we're going to pick off the node and send it
|
||||
select {
|
||||
case node := <-n.solicited:
|
||||
// someone requested the route
|
||||
n.sendTo("advert", ControlChannel, node, msg)
|
||||
default:
|
||||
// send to all since we can't get anything
|
||||
n.sendMsg("advert", ControlChannel, msg)
|
||||
}
|
||||
case <-n.closed:
|
||||
return
|
||||
}
|
||||
@@ -939,41 +1063,87 @@ func (n *network) sendConnect() {
|
||||
Address: n.node.address,
|
||||
},
|
||||
}
|
||||
if err := n.sendMsg("connect", msg, NetworkChannel); err != nil {
|
||||
|
||||
if err := n.sendMsg("connect", NetworkChannel, msg); err != nil {
|
||||
log.Debugf("Network failed to send connect message: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// connect will wait for a link to be established
|
||||
// connect will wait for a link to be established and send the connect
|
||||
// message. We're trying to ensure convergence pretty quickly. So we want
|
||||
// to hear back. In the case we become completely disconnected we'll
|
||||
// connect again once a new link is established
|
||||
func (n *network) connect() {
|
||||
// wait for connected state
|
||||
var connected bool
|
||||
// discovered lets us know what we received a peer message back
|
||||
var discovered bool
|
||||
var attempts int
|
||||
|
||||
// our advertise address
|
||||
loopback := n.server.Options().Advertise
|
||||
// actual address
|
||||
address := n.tunnel.Address()
|
||||
|
||||
for {
|
||||
// check the links
|
||||
// connected is used to define if the link is connected
|
||||
var connected bool
|
||||
|
||||
// check the links state
|
||||
for _, link := range n.tunnel.Links() {
|
||||
// skip loopback
|
||||
if link.Loopback() {
|
||||
continue
|
||||
}
|
||||
|
||||
// if remote is ourselves
|
||||
switch link.Remote() {
|
||||
case loopback, address:
|
||||
continue
|
||||
}
|
||||
|
||||
if link.State() == "connected" {
|
||||
connected = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// if we're not conencted wait
|
||||
// if we're not connected wait
|
||||
if !connected {
|
||||
// reset discovered
|
||||
discovered = false
|
||||
// sleep for a second
|
||||
time.Sleep(time.Second)
|
||||
// now try again
|
||||
continue
|
||||
}
|
||||
|
||||
// send the connect message
|
||||
n.sendConnect()
|
||||
// we're connected but are we discovered?
|
||||
if !discovered {
|
||||
// recreate the clients because all the tunnel links are gone
|
||||
// so we haven't send discovery beneath
|
||||
if err := n.createClients(); err != nil {
|
||||
log.Debugf("Failed to recreate network/control clients: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// check the announce channel
|
||||
// send the connect message
|
||||
n.sendConnect()
|
||||
}
|
||||
|
||||
// check if we've been discovered
|
||||
select {
|
||||
case <-n.announced:
|
||||
case <-n.discovered:
|
||||
discovered = true
|
||||
attempts = 0
|
||||
case <-n.closed:
|
||||
return
|
||||
default:
|
||||
time.Sleep(time.Second)
|
||||
// we have to go again
|
||||
case <-time.After(time.Second + backoff.Do(attempts)):
|
||||
// we have to try again
|
||||
attempts++
|
||||
|
||||
// reset attempts 5 == ~2mins
|
||||
if attempts > 5 {
|
||||
attempts = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -982,18 +1152,18 @@ func (n *network) connect() {
|
||||
func (n *network) Connect() error {
|
||||
n.Lock()
|
||||
|
||||
// try to resolve network nodes
|
||||
nodes, err := n.resolveNodes()
|
||||
if err != nil {
|
||||
log.Debugf("Network failed to resolve nodes: %v", err)
|
||||
}
|
||||
|
||||
// connect network tunnel
|
||||
if err := n.tunnel.Connect(); err != nil {
|
||||
n.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
// try to resolve network nodes
|
||||
nodes, err := n.resolveNodes()
|
||||
if err != nil {
|
||||
log.Debugf("Network failed to resolve nodes: %v", err)
|
||||
}
|
||||
|
||||
// initialize the tunnel to resolved nodes
|
||||
n.tunnel.Init(
|
||||
tunnel.Nodes(nodes...),
|
||||
@@ -1048,8 +1218,6 @@ func (n *network) Connect() error {
|
||||
|
||||
// create closed channel
|
||||
n.closed = make(chan bool)
|
||||
// create new announced channel
|
||||
n.announced = make(chan bool)
|
||||
|
||||
// start the router
|
||||
if err := n.options.Router.Start(); err != nil {
|
||||
@@ -1073,16 +1241,14 @@ func (n *network) Connect() error {
|
||||
|
||||
// send connect after there's a link established
|
||||
go n.connect()
|
||||
// go resolving network nodes
|
||||
// resolve network nodes and re-init the tunnel
|
||||
go n.resolve()
|
||||
// broadcast peers
|
||||
go n.announce(netClient)
|
||||
// prune stale nodes
|
||||
go n.prune()
|
||||
// listen to network messages
|
||||
go n.processNetChan(netListener)
|
||||
// broadcast announcements and prune stale nodes
|
||||
go n.manage()
|
||||
// advertise service routes
|
||||
go n.advertise(advertChan)
|
||||
// listen to network messages
|
||||
go n.processNetChan(netListener)
|
||||
// accept and process routes
|
||||
go n.processCtrlChan(ctrlListener)
|
||||
|
||||
@@ -1112,6 +1278,40 @@ func (n *network) close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// createClients is used to create new clients in the event we lose all the tunnels
|
||||
func (n *network) createClients() error {
|
||||
// dial into ControlChannel to send route adverts
|
||||
ctrlClient, err := n.tunnel.Dial(ControlChannel, tunnel.DialMode(tunnel.Multicast))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// dial into NetworkChannel to send network messages
|
||||
netClient, err := n.tunnel.Dial(NetworkChannel, tunnel.DialMode(tunnel.Multicast))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n.Lock()
|
||||
defer n.Unlock()
|
||||
|
||||
// set the control client
|
||||
c, ok := n.tunClient[ControlChannel]
|
||||
if ok {
|
||||
c.Close()
|
||||
}
|
||||
n.tunClient[ControlChannel] = ctrlClient
|
||||
|
||||
// set the network client
|
||||
c, ok = n.tunClient[NetworkChannel]
|
||||
if ok {
|
||||
c.Close()
|
||||
}
|
||||
n.tunClient[NetworkChannel] = netClient
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes network connection
|
||||
func (n *network) Close() error {
|
||||
n.Lock()
|
||||
@@ -1140,7 +1340,7 @@ func (n *network) Close() error {
|
||||
Address: n.node.address,
|
||||
},
|
||||
}
|
||||
if err := n.sendMsg("close", msg, NetworkChannel); err != nil {
|
||||
if err := n.sendMsg("close", NetworkChannel, msg); err != nil {
|
||||
log.Debugf("Network failed to send close message: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user