ir: Add health status reporting on reconfiguration #1311
9 changed files with 55 additions and 18 deletions
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
configViper "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/internal/common/config"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||
control "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control/ir"
|
||||
"github.com/spf13/viper"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
@ -81,6 +82,10 @@ func watchForSignal(cancel func()) {
|
|||
return
|
||||
case <-sighupCh:
|
||||
log.Info(logs.FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration)
|
||||
if !innerRing.CompareAndSwapHealthStatus(control.HealthStatus_READY, control.HealthStatus_RECONFIGURING) {
|
||||
log.Info(logs.FrostFSNodeSIGHUPSkip)
|
||||
break
|
||||
}
|
||||
err := reloadConfig()
|
||||
if err != nil {
|
||||
log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err))
|
||||
|
@ -92,6 +97,7 @@ func watchForSignal(cancel func()) {
|
|||
if err != nil {
|
||||
log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err))
|
||||
}
|
||||
innerRing.CompareAndSwapHealthStatus(control.HealthStatus_RECONFIGURING, control.HealthStatus_READY)
|
||||
log.Info(logs.FrostFSNodeConfigurationHasBeenReloadedSuccessfully)
|
||||
}
|
||||
}
|
||||
|
|
6
go.mod
6
go.mod
|
@ -59,7 +59,7 @@ require (
|
|||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.13.0 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/consensys/bavard v0.1.13 // indirect
|
||||
github.com/consensys/gnark-crypto v0.12.2-0.20231222162921-eb75782795d2 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
|
@ -120,8 +120,8 @@ require (
|
|||
golang.org/x/crypto v0.21.0 // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/text v0.16.0 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
lukechampine.com/blake3 v1.2.1 // indirect
|
||||
rsc.io/tmplfunc v0.0.3 // indirect
|
||||
|
|
12
go.sum
12
go.sum
|
@ -33,8 +33,8 @@ github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJR
|
|||
github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
|
||||
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cheggaaa/pb v1.0.29 h1:FckUN5ngEk2LpvuG0fw1GEFx6LtyY2pWI/Z2QgCnEYo=
|
||||
github.com/cheggaaa/pb v1.0.29/go.mod h1:W40334L7FMC5JKWldsTWbdGjLo0RxUKK73K+TuPxX30=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
|
@ -380,10 +380,10 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
|
|||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2 h1:rIo7ocm2roD9DcFIX67Ym8icoGCKSARAiPljFhh5suQ=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2/go.mod h1:O1cOfN1Cy6QEYr7VxtjOyP5AdAuR0aJ/MYZaaof623Y=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c h1:lfpJ/2rWPa/kJgxyyXM8PrNnfCzcmxJ265mADgwmvLI=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 h1:NnYq6UN9ReLM9/Y01KWNOWyI5xQ9kbIms5GGJVwS/Yc=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY=
|
||||
google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM=
|
||||
google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
|
|
|
@ -161,6 +161,16 @@ func (s *Server) setHealthStatus(hs control.HealthStatus) {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *Server) CompareAndSwapHealthStatus(oldSt, newSt control.HealthStatus) (swapped bool) {
|
||||
if swapped = s.healthStatus.CompareAndSwap(int32(oldSt), int32(newSt)); swapped {
|
||||
s.notifySystemd(newSt)
|
||||
if s.irMetrics != nil {
|
||||
s.irMetrics.SetHealth(int32(newSt))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// HealthStatus returns the current health status of the IR application.
|
||||
func (s *Server) HealthStatus() control.HealthStatus {
|
||||
return control.HealthStatus(s.healthStatus.Load())
|
||||
|
@ -186,6 +196,8 @@ func (s *Server) notifySystemd(st control.HealthStatus) {
|
|||
err = sdnotify.FlagAndStatus(sdnotify.ReadyEnabled)
|
||||
case control.HealthStatus_SHUTTING_DOWN:
|
||||
err = sdnotify.FlagAndStatus(sdnotify.StoppingEnabled)
|
||||
case control.HealthStatus_RECONFIGURING:
|
||||
err = sdnotify.FlagAndStatus(sdnotify.ReloadingEnabled)
|
||||
default:
|
||||
err = sdnotify.Status(fmt.Sprintf("%v", st))
|
||||
}
|
||||
|
|
18
pkg/services/control/ir/types.pb.go
generated
18
pkg/services/control/ir/types.pb.go
generated
|
@ -32,6 +32,8 @@ const (
|
|||
HealthStatus_READY HealthStatus = 2
|
||||
// IR application is shutting down.
|
||||
HealthStatus_SHUTTING_DOWN HealthStatus = 3
|
||||
// IR application is reconfiguring.
|
||||
HealthStatus_RECONFIGURING HealthStatus = 4
|
||||
)
|
||||
|
||||
// Enum value maps for HealthStatus.
|
||||
|
@ -41,12 +43,14 @@ var (
|
|||
1: "STARTING",
|
||||
2: "READY",
|
||||
3: "SHUTTING_DOWN",
|
||||
4: "RECONFIGURING",
|
||||
}
|
||||
HealthStatus_value = map[string]int32{
|
||||
"HEALTH_STATUS_UNDEFINED": 0,
|
||||
"STARTING": 1,
|
||||
"READY": 2,
|
||||
"SHUTTING_DOWN": 3,
|
||||
"RECONFIGURING": 4,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -144,17 +148,19 @@ var file_pkg_services_control_ir_types_proto_rawDesc = []byte{
|
|||
0x22, 0x36, 0x0a, 0x09, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x10, 0x0a,
|
||||
0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12,
|
||||
0x17, 0x0a, 0x04, 0x73, 0x69, 0x67, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x73,
|
||||
0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x2a, 0x57, 0x0a, 0x0c, 0x48, 0x65, 0x61, 0x6c,
|
||||
0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x2a, 0x6a, 0x0a, 0x0c, 0x48, 0x65, 0x61, 0x6c,
|
||||
0x74, 0x68, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1b, 0x0a, 0x17, 0x48, 0x45, 0x41, 0x4c,
|
||||
0x54, 0x48, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49,
|
||||
0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e,
|
||||
0x47, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x11,
|
||||
0x0a, 0x0d, 0x53, 0x48, 0x55, 0x54, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10,
|
||||
0x03, 0x42, 0x44, 0x5a, 0x42, 0x67, 0x69, 0x74, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73,
|
||||
0x2e, 0x69, 0x6e, 0x66, 0x6f, 0x2f, 0x54, 0x72, 0x75, 0x65, 0x43, 0x6c, 0x6f, 0x75, 0x64, 0x4c,
|
||||
0x61, 0x62, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2d, 0x6e, 0x6f, 0x64, 0x65, 0x2f,
|
||||
0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2f, 0x69, 0x72, 0x2f,
|
||||
0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
|
||||
0x03, 0x12, 0x11, 0x0a, 0x0d, 0x52, 0x45, 0x43, 0x4f, 0x4e, 0x46, 0x49, 0x47, 0x55, 0x52, 0x49,
|
||||
0x4e, 0x47, 0x10, 0x04, 0x42, 0x44, 0x5a, 0x42, 0x67, 0x69, 0x74, 0x2e, 0x66, 0x72, 0x6f, 0x73,
|
||||
0x74, 0x66, 0x73, 0x2e, 0x69, 0x6e, 0x66, 0x6f, 0x2f, 0x54, 0x72, 0x75, 0x65, 0x43, 0x6c, 0x6f,
|
||||
0x75, 0x64, 0x4c, 0x61, 0x62, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2d, 0x6e, 0x6f,
|
||||
0x64, 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2f,
|
||||
0x69, 0x72, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
|
||||
0x6f, 0x33,
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
|
@ -26,4 +26,7 @@ enum HealthStatus {
|
|||
|
||||
// IR application is shutting down.
|
||||
SHUTTING_DOWN = 3;
|
||||
|
||||
// IR application is reconfiguring.
|
||||
RECONFIGURING = 4;
|
||||
}
|
||||
|
|
10
pkg/util/sdnotify/clock.go
Normal file
10
pkg/util/sdnotify/clock.go
Normal file
|
@ -0,0 +1,10 @@
|
|||
package sdnotify
|
||||
|
||||
import (
|
||||
// For go:linkname to work.
|
||||
_ "unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname nanotime runtime.nanotime
|
||||
func nanotime() int64
|
2
pkg/util/sdnotify/clock.s
Normal file
2
pkg/util/sdnotify/clock.s
Normal file
|
@ -0,0 +1,2 @@
|
|||
// The file is intentionally empty.
|
||||
// It is a workaround for https://github.com/golang/go/issues/15006
|
|
@ -6,7 +6,6 @@ import (
|
|||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -17,7 +16,6 @@ const (
|
|||
|
||||
var (
|
||||
socket *net.UnixAddr
|
||||
start = time.Now()
|
||||
|
||||
errSocketVariableIsNotPresent = errors.New("\"NOTIFY_SOCKET\" environment variable is not present")
|
||||
errSocketIsNotInitialized = errors.New("socket is not initialized")
|
||||
|
@ -53,7 +51,7 @@ func FlagAndStatus(status string) error {
|
|||
// must be sent, containing "READY=1".
|
||||
//
|
||||
// For MONOTONIC_USEC format refer to https://www.man7.org/linux/man-pages/man3/sd_notify.3.html
|
||||
fyrchik marked this conversation as resolved
|
||||
status += fmt.Sprintf("\nMONOTONIC_USEC=%d", uint64(time.Since(start))/1e3 /* microseconds in nanoseconds */)
|
||||
status += fmt.Sprintf("\nMONOTONIC_USEC=%d", uint64(nanotime())/1e3 /* microseconds in nanoseconds */)
|
||||
}
|
||||
status += "\nSTATUS=" + strings.TrimSuffix(status, "=1")
|
||||
return Send(status)
|
||||
|
|
Loading…
Add table
Reference in a new issue
Maybe in IR SIGHUP is so fast that we send the same
MONOTONIC_USEC
? This would've explained the problem.Receiving the same
MONOTONIC_USEC
doesn't seem like a problem tosystemd
. I tried sendingtime.Since(time.Time{})
(always the same value),systemd
is OK with it, service reload is successful.It appears as if
systemd
does not accept values less than some minimum and greater than some maximum, and time in us since start is deemed as being too small. Time in ns since start works fine most of the time but not always (i've got hang-ups a couple times). Haven't found those min & max values in systemd source code yet.Passing a
math.MaxInt64
asMONOTONIC_USEC
works fine butmath.MaxUint64
is not accepted.