ir: Add health status reporting on reconfiguration #1311
9 changed files with 37 additions and 6 deletions
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
configViper "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/internal/common/config"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||
control "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control/ir"
|
||||
"github.com/spf13/viper"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
@ -81,6 +82,10 @@ func watchForSignal(cancel func()) {
|
|||
return
|
||||
case <-sighupCh:
|
||||
log.Info(logs.FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration)
|
||||
if !innerRing.CompareAndSwapHealthStatus(control.HealthStatus_READY, control.HealthStatus_RECONFIGURING) {
|
||||
log.Info(logs.FrostFSNodeSIGHUPSkip)
|
||||
break
|
||||
}
|
||||
err := reloadConfig()
|
||||
if err != nil {
|
||||
log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err))
|
||||
|
@ -92,6 +97,7 @@ func watchForSignal(cancel func()) {
|
|||
if err != nil {
|
||||
log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err))
|
||||
}
|
||||
innerRing.CompareAndSwapHealthStatus(control.HealthStatus_RECONFIGURING, control.HealthStatus_READY)
|
||||
log.Info(logs.FrostFSNodeConfigurationHasBeenReloadedSuccessfully)
|
||||
}
|
||||
}
|
||||
|
|
6
go.mod
6
go.mod
|
@ -59,7 +59,7 @@ require (
|
|||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.13.0 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/consensys/bavard v0.1.13 // indirect
|
||||
github.com/consensys/gnark-crypto v0.12.2-0.20231222162921-eb75782795d2 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
|
@ -120,8 +120,8 @@ require (
|
|||
golang.org/x/crypto v0.21.0 // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/text v0.16.0 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
lukechampine.com/blake3 v1.2.1 // indirect
|
||||
rsc.io/tmplfunc v0.0.3 // indirect
|
||||
|
|
BIN
go.sum
BIN
go.sum
Binary file not shown.
|
@ -161,6 +161,16 @@ func (s *Server) setHealthStatus(hs control.HealthStatus) {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *Server) CompareAndSwapHealthStatus(oldSt, newSt control.HealthStatus) (swapped bool) {
|
||||
if swapped = s.healthStatus.CompareAndSwap(int32(oldSt), int32(newSt)); swapped {
|
||||
s.notifySystemd(newSt)
|
||||
if s.irMetrics != nil {
|
||||
s.irMetrics.SetHealth(int32(newSt))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// HealthStatus returns the current health status of the IR application.
|
||||
func (s *Server) HealthStatus() control.HealthStatus {
|
||||
return control.HealthStatus(s.healthStatus.Load())
|
||||
|
@ -186,6 +196,8 @@ func (s *Server) notifySystemd(st control.HealthStatus) {
|
|||
err = sdnotify.FlagAndStatus(sdnotify.ReadyEnabled)
|
||||
case control.HealthStatus_SHUTTING_DOWN:
|
||||
err = sdnotify.FlagAndStatus(sdnotify.StoppingEnabled)
|
||||
case control.HealthStatus_RECONFIGURING:
|
||||
err = sdnotify.FlagAndStatus(sdnotify.ReloadingEnabled)
|
||||
default:
|
||||
err = sdnotify.Status(fmt.Sprintf("%v", st))
|
||||
}
|
||||
|
|
BIN
pkg/services/control/ir/types.pb.go
generated
BIN
pkg/services/control/ir/types.pb.go
generated
Binary file not shown.
|
@ -26,4 +26,7 @@ enum HealthStatus {
|
|||
|
||||
// IR application is shutting down.
|
||||
SHUTTING_DOWN = 3;
|
||||
|
||||
// IR application is reconfiguring.
|
||||
RECONFIGURING = 4;
|
||||
}
|
||||
|
|
10
pkg/util/sdnotify/clock.go
Normal file
10
pkg/util/sdnotify/clock.go
Normal file
|
@ -0,0 +1,10 @@
|
|||
package sdnotify
|
||||
|
||||
import (
|
||||
// For go:linkname to work.
|
||||
_ "unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname nanotime runtime.nanotime
|
||||
func nanotime() int64
|
2
pkg/util/sdnotify/clock.s
Normal file
2
pkg/util/sdnotify/clock.s
Normal file
|
@ -0,0 +1,2 @@
|
|||
// The file is intentionally empty.
|
||||
// It is a workaround for https://github.com/golang/go/issues/15006
|
|
@ -6,7 +6,6 @@ import (
|
|||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -17,7 +16,6 @@ const (
|
|||
|
||||
var (
|
||||
socket *net.UnixAddr
|
||||
start = time.Now()
|
||||
|
||||
errSocketVariableIsNotPresent = errors.New("\"NOTIFY_SOCKET\" environment variable is not present")
|
||||
errSocketIsNotInitialized = errors.New("socket is not initialized")
|
||||
|
@ -53,7 +51,7 @@ func FlagAndStatus(status string) error {
|
|||
// must be sent, containing "READY=1".
|
||||
//
|
||||
// For MONOTONIC_USEC format refer to https://www.man7.org/linux/man-pages/man3/sd_notify.3.html
|
||||
fyrchik marked this conversation as resolved
|
||||
status += fmt.Sprintf("\nMONOTONIC_USEC=%d", uint64(time.Since(start))/1e3 /* microseconds in nanoseconds */)
|
||||
status += fmt.Sprintf("\nMONOTONIC_USEC=%d", uint64(nanotime())/1e3 /* microseconds in nanoseconds */)
|
||||
}
|
||||
status += "\nSTATUS=" + strings.TrimSuffix(status, "=1")
|
||||
return Send(status)
|
||||
|
|
Loading…
Reference in a new issue
Maybe in IR SIGHUP is so fast that we send the same
MONOTONIC_USEC
? This would've explained the problem.Receiving the same
MONOTONIC_USEC
doesn't seem like a problem tosystemd
. I tried sendingtime.Since(time.Time{})
(always the same value),systemd
is OK with it, service reload is successful.It appears as if
systemd
does not accept values less than some minimum and greater than some maximum, and time in us since start is deemed as being too small. Time in ns since start works fine most of the time but not always (i've got hang-ups a couple times). Haven't found those min & max values in systemd source code yet.Passing a
math.MaxInt64
asMONOTONIC_USEC
works fine butmath.MaxUint64
is not accepted.