plugin/metrics: Add a metric to monitor which plugin(s) is(are) enabled (#2700)

* Add a GaugeVec for enabled plugins monitoring.

Signed-off-by: Jiacheng Xu <xjcmaxwellcjx@gmail.com>

* Add server label and zone label for enable_plugin matric.

* Add a test for PluginEnabled metric

* Add description for enabledPlugin metric.

* Change the description for the enabledPlugin metric.

* Reset the enabledPlugin metric when restart the server.

* Add the bug session for enabledPlugin metric.

* Remove the resolveTCPAddr
This commit is contained in:
Jiacheng Xu 2019-03-23 10:43:15 +01:00 committed by Miek Gieben
parent a3dd8cdf8d
commit 0e137b23f1
5 changed files with 63 additions and 0 deletions

View file

@ -19,6 +19,7 @@ The following metrics are exported:
* `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type.
* `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes.
* `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode.
* `coredns_plugin_enabled{server, zone, name}` - indicates whether a plugin is enabled on per server and zone basis.
Each counter has a label `zone` which is the zonename used for the request/response.
@ -75,3 +76,4 @@ When reloading, the Prometheus handler is stopped before the new server instance
If that new server fails to start, then the initial server instance is still available and DNS queries still served,
but Prometheus handler stays down.
Prometheus will not reply HTTP request until a successful reload or a complete restart of CoreDNS.
Only the plugins that register as Handler are visible in `coredns_plugin_enabled{server, zone, name}`. As of today the plugins reload and bind will not be reported.

View file

@ -51,6 +51,7 @@ func New(addr string) *Metrics {
met.MustRegister(vars.RequestType)
met.MustRegister(vars.ResponseSize)
met.MustRegister(vars.ResponseRcode)
met.MustRegister(vars.PluginEnabled)
return met
}

View file

@ -7,6 +7,7 @@ import (
"github.com/coredns/coredns/core/dnsserver"
"github.com/coredns/coredns/coremain"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics/vars"
clog "github.com/coredns/coredns/plugin/pkg/log"
"github.com/coredns/coredns/plugin/pkg/uniq"
@ -50,6 +51,23 @@ func setup(c *caddy.Controller) error {
return nil
})
c.OnRestart(func() error {
vars.PluginEnabled.Reset()
return nil
})
c.OnStartup(func() error {
conf := dnsserver.GetConfig(c)
plugins := conf.Handlers()
for _, h := range conf.ListenHosts {
addrstr := conf.Transport + "://" + net.JoinHostPort(h, conf.Port)
for _, p := range plugins {
vars.PluginEnabled.WithLabelValues(addrstr, conf.Zone, p.Name()).Set(1)
}
}
return nil
})
c.OnRestart(m.OnRestart)
c.OnFinalShutdown(m.OnFinalShutdown)

View file

@ -65,6 +65,12 @@ var (
Name: "panic_count_total",
Help: "A metrics that counts the number of panics.",
})
PluginEnabled = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: plugin.Namespace,
Name: "plugin_enabled",
Help: "A metric that indicates whether a plugin is enabled on per server and zone basis.",
}, []string{"server", "zone", "name"})
)
const (

View file

@ -186,3 +186,39 @@ google.com:0 {
t.Errorf("Expected metric data retrieved for %s, expected %d, got %d", cacheSizeMetricName, 1, endCacheSize-beginCacheSize)
}
}
func TestMetricsPluginEnabled(t *testing.T) {
corefile := `example.org:0 {
chaos CoreDNS-001 miek@miek.nl
prometheus localhost:0
}
example.com:0 {
forward . 8.8.4.4:53
prometheus localhost:0
}
`
srv, err := CoreDNSServer(corefile)
if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
defer srv.Stop()
metricName := "coredns_plugin_enabled" //{server, zone, name}
data := test.Scrape("http://" + metrics.ListenAddr + "/metrics")
// Get the value for the metrics where the one of the labels values matches "chaos".
got, _ := test.MetricValueLabel(metricName, "chaos", data)
if got != "1" {
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
}
// Get the value for the metrics where the one of the labels values matches "whoami".
got, _ = test.MetricValueLabel(metricName, "whoami", data)
if got != "" {
t.Errorf("Expected value %s for %s, but got %s", "", metricName, got)
}
}