Use exponential backoff when fetching user-data from an URL.

The user-cloudinit-proc-cmdline systemd unit is responsible for
fetching user-data from various sources during the cloud-init
process. When fetching user-data from an URL datasource we face
a race condition since the network may not be available, which
can cause the job to fail and no further attempts to fetch the
user-data are made.

Eliminate the race condition when fetching user-data from an URL
datasource. Retry the fetch using an exponential backoff until
the user-data is retrieved.

Fixes issue 105.
This commit is contained in:
Kelsey Hightower 2014-05-14 22:20:40 -07:00
parent b505e6241c
commit 31f61d7531
2 changed files with 68 additions and 7 deletions

View File

@ -2,7 +2,10 @@ package datasource
import ( import (
"io/ioutil" "io/ioutil"
"log"
"math"
"net/http" "net/http"
"time"
) )
type Datasource interface { type Datasource interface {
@ -11,17 +14,12 @@ type Datasource interface {
} }
func fetchURL(url string) ([]byte, error) { func fetchURL(url string) ([]byte, error) {
client := http.Client{} resp, err := getWithExponentialBackoff(url)
resp, err := client.Get(url)
if err != nil { if err != nil {
return []byte{}, err return []byte{}, err
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode / 100 != 2 {
return []byte{}, nil
}
respBytes, err := ioutil.ReadAll(resp.Body) respBytes, err := ioutil.ReadAll(resp.Body)
if err != nil { if err != nil {
return nil, err return nil, err
@ -29,3 +27,21 @@ func fetchURL(url string) ([]byte, error) {
return respBytes, nil return respBytes, nil
} }
// getWithExponentialBackoff issues a GET to the specified URL. If the
// response is a non-2xx or produces an error, retry the GET forever using
// an exponential backoff.
func getWithExponentialBackoff(url string) (*http.Response, error) {
var err error
var resp *http.Response
for i := 0; ; i++ {
resp, err = http.Get(url)
if err == nil && resp.StatusCode/100 == 2 {
return resp, nil
}
duration := time.Millisecond * time.Duration((math.Pow(float64(2), float64(i)) * 100))
log.Printf("unable to fetch user-data from %s, try again in %s", url, duration)
time.Sleep(duration)
}
return resp, err
}

View File

@ -0,0 +1,45 @@
package datasource
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"testing"
)
var expBackoffTests = []struct {
count int
body string
}{
{0, "number of attempts: 0"},
{1, "number of attempts: 1"},
{2, "number of attempts: 2"},
}
func TestGetWithExponentialBackoff(t *testing.T) {
for i, tt := range expBackoffTests {
mux := http.NewServeMux()
count := 0
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if count == tt.count {
io.WriteString(w, fmt.Sprintf("number of attempts: %d", count))
return
}
count++
http.Error(w, "", 500)
})
ts := httptest.NewServer(mux)
defer ts.Close()
data, err := fetchURL(ts.URL)
if err != nil {
t.Errorf("Test case %d produced error: %v", i, err)
}
if count != tt.count {
t.Errorf("Test case %d failed: %d != %d", i, count, tt.count)
}
if string(data) != tt.body {
t.Errorf("Test case %d failed: %s != %s", i, tt.body, data)
}
}
}