Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions discovery/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
Expand All @@ -45,10 +46,17 @@ var (
}
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)

failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_http_failures_total",
Help: "Number of HTTP service discovery refresh failures.",
})
)

func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}

// SDConfig is the configuration for HTTP based discovery.
Expand Down Expand Up @@ -145,6 +153,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {

resp, err := d.client.Do(req.WithContext(ctx))
if err != nil {
failuresCount.Inc()
return nil, err
}
defer func() {
Expand All @@ -153,26 +162,31 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
}()

if resp.StatusCode != http.StatusOK {
failuresCount.Inc()
return nil, errors.Errorf("server returned HTTP status %s", resp.Status)
}

if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
failuresCount.Inc()
return nil, errors.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
}

b, err := ioutil.ReadAll(resp.Body)
if err != nil {
failuresCount.Inc()
return nil, err
}

var targetGroups []*targetgroup.Group

if err := json.Unmarshal(b, &targetGroups); err != nil {
failuresCount.Inc()
return nil, err
}

for i, tg := range targetGroups {
if tg == nil {
failuresCount.Inc()
err = errors.New("nil target group item found")
return nil, err
}
Expand Down
30 changes: 30 additions & 0 deletions discovery/http/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"time"

"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -61,6 +63,7 @@ func TestHTTPValidRefresh(t *testing.T) {
},
}
require.Equal(t, tgs, expectedTargets)
require.Equal(t, 0.0, getFailureCount())
}

func TestHTTPInvalidCode(t *testing.T) {
Expand All @@ -82,6 +85,7 @@ func TestHTTPInvalidCode(t *testing.T) {
ctx := context.Background()
_, err = d.refresh(ctx)
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
require.Equal(t, 1.0, getFailureCount())
}

func TestHTTPInvalidFormat(t *testing.T) {
Expand All @@ -103,6 +107,32 @@ func TestHTTPInvalidFormat(t *testing.T) {
ctx := context.Background()
_, err = d.refresh(ctx)
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
require.Equal(t, 1.0, getFailureCount())
}

var lastFailureCount float64

func getFailureCount() float64 {
failureChan := make(chan prometheus.Metric)

go func() {
failuresCount.Collect(failureChan)
close(failureChan)
}()

var counter dto.Metric
for {
metric, ok := <-failureChan
if ok == false {
break
}
metric.Write(&counter)
}

// account for failures in prior tests
count := *counter.Counter.Value - lastFailureCount
lastFailureCount = *counter.Counter.Value
return count
}

func TestContentTypeRegex(t *testing.T) {
Expand Down
5 changes: 3 additions & 2 deletions docs/configuration/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -1448,8 +1448,9 @@ Example response body:
]
```

The endpoint is queried periodically at the specified
refresh interval.
The endpoint is queried periodically at the specified refresh interval.
The `prometheus_sd_http_failures_total` counter metric tracks the number of
refresh failures.

Each target has a meta label `__meta_url` during the
[relabeling phase](#relabel_config). Its value is set to the
Expand Down
3 changes: 2 additions & 1 deletion docs/http_sd.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ an empty list `[]`. Target lists are unordered.

Prometheus caches target lists. If an error occurs while fetching an updated
targets list, Prometheus keeps using the current targets list. The targets list
is not saved across restart.
is not saved across restart. The `prometheus_sd_http_failures_total` counter
metric tracks the number of refresh failures.

The whole list of targets must be returned on every scrape. There is no support
for incremental updates. A Prometheus instance does not send its hostname and it
Expand Down