plugin/proxy: kick of HC on every 3rd failure (#1110)

* healthchecks: check on every 3rd failure Check on every third failure and some cleanups to make this possible. A failed healthcheck will never increase Fails, a successfull healthceck will reset Fails to 0. This is a chance this counter now drops below 0, making the upstream super? healthy. This removes the okUntil smartness and condences everything back to 1 metrics: Fails; so it's simpler in that regard. Timout errors are *not* attributed to the local upstream, and don't get counted into the Fails anymore. Meaning the 'dig any isc.org' won't kill your upstream. Added extra test the see if the Fails counter gets reset after 3 failed connection. There is still a disconnect beween HTTP healthceck working the proxy (or lookup) not being able to connect to the upstream. * Fix tests
2017-10-15 19:38:39 +02:00 · 2017-10-15 19:38:39 +02:00 · e34e2c251f
commit e34e2c251f
parent c7ff44fb3a
13 changed files with 180 additions and 190 deletions
--- a/plugin/proxy/proxy_test.go
+++ b/plugin/proxy/proxy_test.go
@ -15,29 +15,16 @@ import (
 func TestStop(t *testing.T) {
 	config := "proxy . %s {\n health_check /healthcheck:%s %dms \n}"
 	tests := []struct {
-		name                    string
 		intervalInMilliseconds  int
 		numHealthcheckIntervals int
 	}{
-		{
-			"No Healthchecks After Stop - 5ms, 1 intervals",
-			5,
-			1,
-		},
-		{
-			"No Healthchecks After Stop - 5ms, 2 intervals",
-			5,
-			2,
-		},
-		{
-			"No Healthchecks After Stop - 5ms, 3 intervals",
-			5,
-			3,
-		},
+		{5, 1},
+		{5, 2},
+		{5, 3},
 	}

-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
+	for i, test := range tests {
+		t.Run(fmt.Sprintf("Test %d", i), func(t *testing.T) {

 			// Set up proxy.
 			var counter int64
@ -53,7 +40,7 @@ func TestStop(t *testing.T) {
 			c := caddyfile.NewDispenser("Testfile", strings.NewReader(fmt.Sprintf(config, back, port, test.intervalInMilliseconds)))
 			upstreams, err := NewStaticUpstreams(&c)
 			if err != nil {
-				t.Error("Expected no error. Got:", err.Error())
+				t.Errorf("Test %d, expected no error. Got: %s", i, err)
 			}

 			// Give some time for healthchecks to hit the server.
@ -61,27 +48,25 @@ func TestStop(t *testing.T) {

 			for _, upstream := range upstreams {
 				if err := upstream.Stop(); err != nil {
-					t.Error("Expected no error stopping upstream. Got: ", err.Error())
+					t.Errorf("Test %d, expected no error stopping upstream, got: %s", i, err)
 				}
 			}

-			counterValueAfterShutdown := atomic.LoadInt64(&counter)
+			counterAfterShutdown := atomic.LoadInt64(&counter)

 			// Give some time to see if healthchecks are still hitting the server.
 			time.Sleep(time.Duration(test.intervalInMilliseconds*test.numHealthcheckIntervals) * time.Millisecond)

-			if counterValueAfterShutdown == 0 {
-				t.Error("Expected healthchecks to hit test server. Got no healthchecks.")
+			if counterAfterShutdown == 0 {
+				t.Errorf("Test %d, Expected healthchecks to hit test server, got none", i)
 			}

 			// health checks are in a go routine now, so one may well occur after we shutdown,
 			// but we only ever expect one more
-			counterValueAfterWaiting := atomic.LoadInt64(&counter)
-			if counterValueAfterWaiting > (counterValueAfterShutdown + 1) {
-				t.Errorf("Expected no more healthchecks after shutdown. Got: %d healthchecks after shutdown", counterValueAfterWaiting-counterValueAfterShutdown)
+			counterAfterWaiting := atomic.LoadInt64(&counter)
+			if counterAfterWaiting > (counterAfterShutdown + 1) {
+				t.Errorf("Test %d, expected no more healthchecks after shutdown. got: %d healthchecks after shutdown", i, counterAfterWaiting-counterAfterShutdown)
 			}
-
 		})
-
 	}
 }