forked from TrueCloudLab/frostfs-node
[#1465] node: Prevent process from killing by systemd when shutting down
Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
This commit is contained in:
parent
362f24953a
commit
85cf1f47ac
5 changed files with 104 additions and 74 deletions
|
@ -25,6 +25,7 @@ Changelog for FrostFS Node
|
|||
- Correct status error for expired session token (#2207)
|
||||
- Set flag `mode` required for `frostfs-cli control shards set-mode` (#8)
|
||||
- Fix `dirty` suffix in debian package version (#53)
|
||||
- Prevent node process from killing by systemd when shutting down (#1465)
|
||||
|
||||
### Removed
|
||||
### Updated
|
||||
|
|
|
@ -911,60 +911,91 @@ type dCfg struct {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *cfg) configWatcher(ctx context.Context) {
|
||||
func (c *cfg) signalWatcher() {
|
||||
ch := make(chan os.Signal, 1)
|
||||
signal.Notify(ch, syscall.SIGHUP)
|
||||
signal.Notify(ch, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ch:
|
||||
c.log.Info("SIGHUP has been received, rereading configuration...")
|
||||
case sig := <-ch:
|
||||
switch sig {
|
||||
case syscall.SIGHUP:
|
||||
c.reloadConfig()
|
||||
case syscall.SIGTERM, syscall.SIGINT:
|
||||
c.log.Info("termination signal has been received, stopping...")
|
||||
// TODO (@acid-ant): #49 need to cover case when stuck at the middle(node health UNDEFINED or STARTING)
|
||||
|
||||
err := c.readConfig(c.appCfg)
|
||||
if err != nil {
|
||||
c.log.Error("configuration reading", zap.Error(err))
|
||||
continue
|
||||
c.shutdown()
|
||||
|
||||
c.log.Info("termination signal processing is complete")
|
||||
return
|
||||
}
|
||||
case err := <-c.internalErr: // internal application error
|
||||
c.log.Warn("internal application error",
|
||||
zap.String("message", err.Error()))
|
||||
|
||||
// all the components are expected to support
|
||||
// Logger's dynamic reconfiguration approach
|
||||
var components []dCfg
|
||||
c.shutdown()
|
||||
|
||||
// Logger
|
||||
|
||||
logPrm, err := c.loggerPrm()
|
||||
if err != nil {
|
||||
c.log.Error("logger configuration preparation", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
components = append(components, dCfg{name: "logger", cfg: logPrm})
|
||||
|
||||
// Storage Engine
|
||||
|
||||
var rcfg engine.ReConfiguration
|
||||
for _, optsWithID := range c.shardOpts() {
|
||||
rcfg.AddShard(optsWithID.configID, optsWithID.shOpts)
|
||||
}
|
||||
|
||||
err = c.cfgObject.cfgLocalStorage.localStorage.Reload(rcfg)
|
||||
if err != nil {
|
||||
c.log.Error("storage engine configuration update", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
for _, component := range components {
|
||||
err = component.cfg.Reload()
|
||||
if err != nil {
|
||||
c.log.Error("updated configuration applying",
|
||||
zap.String("component", component.name),
|
||||
zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
c.log.Info("configuration has been reloaded successfully")
|
||||
case <-ctx.Done():
|
||||
c.log.Info("internal error processing is complete")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cfg) reloadConfig() {
|
||||
c.log.Info("SIGHUP has been received, rereading configuration...")
|
||||
|
||||
err := c.readConfig(c.appCfg)
|
||||
if err != nil {
|
||||
c.log.Error("configuration reading", zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
// all the components are expected to support
|
||||
// Logger's dynamic reconfiguration approach
|
||||
var components []dCfg
|
||||
|
||||
// Logger
|
||||
|
||||
logPrm, err := c.loggerPrm()
|
||||
if err != nil {
|
||||
c.log.Error("logger configuration preparation", zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
components = append(components, dCfg{name: "logger", cfg: logPrm})
|
||||
|
||||
// Storage Engine
|
||||
|
||||
var rcfg engine.ReConfiguration
|
||||
for _, optsWithID := range c.shardOpts() {
|
||||
rcfg.AddShard(optsWithID.configID, optsWithID.shOpts)
|
||||
}
|
||||
|
||||
err = c.cfgObject.cfgLocalStorage.localStorage.Reload(rcfg)
|
||||
if err != nil {
|
||||
c.log.Error("storage engine configuration update", zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
for _, component := range components {
|
||||
err = component.cfg.Reload()
|
||||
if err != nil {
|
||||
c.log.Error("updated configuration applying",
|
||||
zap.String("component", component.name),
|
||||
zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
c.log.Info("configuration has been reloaded successfully")
|
||||
}
|
||||
|
||||
func (c *cfg) shutdown() {
|
||||
c.setHealthStatus(control.HealthStatus_SHUTTING_DOWN)
|
||||
|
||||
c.ctxCancel()
|
||||
for i := range c.closers {
|
||||
c.closers[len(c.closers)-1-i]()
|
||||
}
|
||||
close(c.internalErr)
|
||||
}
|
||||
|
|
|
@ -6,8 +6,6 @@ import (
|
|||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/TrueCloudLab/frostfs-node/cmd/frostfs-node/config"
|
||||
"github.com/TrueCloudLab/frostfs-node/misc"
|
||||
|
@ -66,10 +64,6 @@ func main() {
|
|||
c.setHealthStatus(control.HealthStatus_READY)
|
||||
|
||||
wait(c)
|
||||
|
||||
c.setHealthStatus(control.HealthStatus_SHUTTING_DOWN)
|
||||
|
||||
shutdown(c)
|
||||
}
|
||||
|
||||
func initAndLog(c *cfg, name string, initializer func(*cfg)) {
|
||||
|
@ -79,9 +73,18 @@ func initAndLog(c *cfg, name string, initializer func(*cfg)) {
|
|||
}
|
||||
|
||||
func initApp(c *cfg) {
|
||||
initLocalStorage(c)
|
||||
c.ctx, c.ctxCancel = context.WithCancel(context.Background())
|
||||
|
||||
c.ctx, c.ctxCancel = signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
c.signalWatcher()
|
||||
c.wg.Done()
|
||||
}()
|
||||
|
||||
initAndLog(c, "pprof", initProfiler)
|
||||
initAndLog(c, "prometheus", initMetrics)
|
||||
|
||||
initLocalStorage(c)
|
||||
|
||||
initAndLog(c, "storage engine", func(c *cfg) {
|
||||
fatalOnErr(c.cfgObject.cfgLocalStorage.localStorage.Open())
|
||||
|
@ -96,14 +99,10 @@ func initApp(c *cfg) {
|
|||
initAndLog(c, "reputation", initReputationService)
|
||||
initAndLog(c, "notification", initNotifications)
|
||||
initAndLog(c, "object", initObjectService)
|
||||
initAndLog(c, "pprof", initProfiler)
|
||||
initAndLog(c, "prometheus", initMetrics)
|
||||
initAndLog(c, "tree", initTreeService)
|
||||
initAndLog(c, "control", initControlService)
|
||||
|
||||
initAndLog(c, "morph notifications", listenMorphNotifications)
|
||||
|
||||
c.workers = append(c.workers, newWorkerFromFunc(c.configWatcher))
|
||||
}
|
||||
|
||||
func runAndLog(c *cfg, name string, logSuccess bool, starter func(*cfg)) {
|
||||
|
@ -128,21 +127,7 @@ func wait(c *cfg) {
|
|||
c.log.Info("application started",
|
||||
zap.String("version", misc.Version))
|
||||
|
||||
select {
|
||||
case <-c.ctx.Done(): // graceful shutdown
|
||||
case err := <-c.internalErr: // internal application error
|
||||
close(c.internalErr)
|
||||
c.ctxCancel()
|
||||
|
||||
c.log.Warn("internal application error",
|
||||
zap.String("message", err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
func shutdown(c *cfg) {
|
||||
for _, closer := range c.closers {
|
||||
closer()
|
||||
}
|
||||
<-c.ctx.Done() // graceful shutdown
|
||||
|
||||
c.log.Debug("waiting for all processes to stop")
|
||||
|
||||
|
|
|
@ -126,7 +126,20 @@ func (c *Client) notificationLoop() {
|
|||
continue
|
||||
}
|
||||
|
||||
c.notifications <- n
|
||||
select {
|
||||
case c.notifications <- n:
|
||||
continue
|
||||
case <-c.cfg.ctx.Done():
|
||||
_ = c.UnsubscribeAll()
|
||||
c.close()
|
||||
|
||||
return
|
||||
case <-c.closeChan:
|
||||
_ = c.UnsubscribeAll()
|
||||
c.close()
|
||||
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ func (c *Client) Close() {
|
|||
// closing should be done via the channel
|
||||
// to prevent switching to another RPC node
|
||||
// in the notification loop
|
||||
c.closeChan <- struct{}{}
|
||||
close(c.closeChan)
|
||||
}
|
||||
|
||||
// SubscribeForExecutionNotifications adds subscription for notifications
|
||||
|
|
Loading…
Reference in a new issue