Compare commits

..

1 commit

Author SHA1 Message Date
e1595e385b [#502] Add Dropped logs (by sampling) metric
All checks were successful
/ DCO (pull_request) Successful in 2m9s
/ Vulncheck (pull_request) Successful in 2m15s
/ Builds (pull_request) Successful in 2m2s
/ Lint (pull_request) Successful in 3m9s
/ Tests (pull_request) Successful in 2m4s
Signed-off-by: Pavel Pogodaev <p.pogodaev@yadro.com>
2024-11-28 11:26:52 +03:00
5 changed files with 63 additions and 26 deletions

View file

@ -163,6 +163,7 @@ func (a *App) init(ctx context.Context) {
a.initPolicyStorage(ctx)
a.initAPI(ctx)
a.initMetrics()
a.initLogger()
a.initServers(ctx)
a.initTracing(ctx)
}
@ -523,6 +524,11 @@ func (a *App) initMetrics() {
a.metrics.State().SetHealth(metrics.HealthStatusStarting)
}
func (a *App) initLogger() {
coreWithContext := applyZapCoreMiddlewares(a.log.Core(), a.cfg, a.metrics)
a.log = zap.New(coreWithContext, zap.AddStacktrace(zap.NewAtomicLevelAt(zap.FatalLevel)))
}
func (a *App) initFrostfsID(ctx context.Context) {
cli, err := ffidcontract.New(ctx, ffidcontract.Config{
RPCAddress: a.cfg.GetString(cfgRPCEndpoint),

View file

@ -18,6 +18,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
internalnet "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/net"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/version"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/metrics"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool"
"git.frostfs.info/TrueCloudLab/zapjournald"
@ -1104,9 +1105,9 @@ func pickLogger(v *viper.Viper) *Logger {
switch dest {
case destinationStdout:
return newStdoutLogger(v, lvl)
return newStdoutLogger(lvl)
case destinationJournald:
return newJournaldLogger(v, lvl)
return newJournaldLogger(lvl)
default:
panic(fmt.Sprintf("wrong destination for logger: %s", dest))
}
@ -1126,21 +1127,19 @@ func pickLogger(v *viper.Viper) *Logger {
// Logger records a stack trace for all messages at or above fatal level.
//
// See also zapcore.Level, zap.NewProductionConfig, zap.AddStacktrace.
func newStdoutLogger(v *viper.Viper, lvl zapcore.Level) *Logger {
func newStdoutLogger(lvl zapcore.Level) *Logger {
stdout := zapcore.AddSync(os.Stderr)
level := zap.NewAtomicLevelAt(lvl)
consoleOutCore := zapcore.NewCore(newLogEncoder(), stdout, level)
consoleOutCore = samplingEnabling(v, consoleOutCore)
return &Logger{
logger: zap.New(consoleOutCore, zap.AddStacktrace(zap.NewAtomicLevelAt(zap.FatalLevel))),
lvl: level,
}
}
func newJournaldLogger(v *viper.Viper, lvl zapcore.Level) *Logger {
func newJournaldLogger(lvl zapcore.Level) *Logger {
level := zap.NewAtomicLevelAt(lvl)
encoder := zapjournald.NewPartialEncoder(newLogEncoder(), zapjournald.SyslogFields)
@ -1152,8 +1151,6 @@ func newJournaldLogger(v *viper.Viper, lvl zapcore.Level) *Logger {
zapjournald.SyslogPid(),
})
coreWithContext = samplingEnabling(v, coreWithContext)
l := zap.New(coreWithContext, zap.AddStacktrace(zap.NewAtomicLevelAt(zap.FatalLevel)))
return &Logger{
@ -1169,24 +1166,6 @@ func newLogEncoder() zapcore.Encoder {
return zapcore.NewConsoleEncoder(c)
}
func samplingEnabling(v *viper.Viper, core zapcore.Core) zapcore.Core {
// Zap samples by logging the first cgfLoggerSamplingInitial entries with a given level
// and message within the specified time interval.
// In the above config, only the first cgfLoggerSamplingInitial log entries with the same level and message
// are recorded in cfgLoggerSamplingInterval interval. Every other log entry will be dropped within the interval since
// cfgLoggerSamplingThereafter is specified here.
if v.GetBool(cfgLoggerSamplingEnabled) {
core = zapcore.NewSamplerWithOptions(
core,
v.GetDuration(cfgLoggerSamplingInterval),
v.GetInt(cfgLoggerSamplingInitial),
v.GetInt(cfgLoggerSamplingThereafter),
)
}
return core
}
func getLogLevel(v *viper.Viper) (zapcore.Level, error) {
var lvl zapcore.Level
lvlStr := v.GetString(cfgLoggerLevel)
@ -1221,3 +1200,19 @@ LOOP:
}
return validDomains
}
func applyZapCoreMiddlewares(core zapcore.Core, v *viper.Viper, appMetrics *metrics.AppMetrics) zapcore.Core {
if v.GetBool(cfgLoggerSamplingEnabled) {
core = zapcore.NewSamplerWithOptions(core,
v.GetDuration(cfgLoggerSamplingInterval),
v.GetInt(cfgLoggerSamplingInitial),
v.GetInt(cfgLoggerSamplingThereafter),
zapcore.SamplerHook(func(_ zapcore.Entry, dec zapcore.SamplingDecision) {
if dec&zapcore.LogDropped > 0 {
appMetrics.DroppedLogsInc()
}
}))
}
return core
}

View file

@ -42,6 +42,14 @@ func NewAppMetrics(cfg AppMetricsConfig) *AppMetrics {
}
}
func (m *AppMetrics) DroppedLogsInc() {
if !m.isEnabled() {
return
}
m.gate.Stats.DroppedLogsInc()
}
func (m *AppMetrics) SetEnabled(enabled bool) {
if !enabled {
m.logger.Warn(logs.MetricsAreDisabled)

View file

@ -93,6 +93,13 @@ var appMetricsDesc = map[string]map[string]Description{
},
},
statisticSubsystem: {
droppedLogs: Description{
Type: dto.MetricType_COUNTER,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: droppedLogs,
Help: "Dropped logs (by sampling) count",
},
requestsSecondsMetric: Description{
Type: dto.MetricType_HISTOGRAM,
Namespace: namespace,
@ -252,3 +259,12 @@ func mustNewHistogramVec(description Description, buckets []float64) *prometheus
description.VariableLabels,
)
}
func mustNewCounter(description Description) prometheus.Counter {
if description.Type != dto.MetricType_COUNTER {
panic("invalid metric type")
}
return prometheus.NewCounter(
prometheus.CounterOpts(newOpts(description)),
)
}

View file

@ -34,6 +34,7 @@ type (
APIStatMetrics struct {
stats *httpStats
httpRequestsDuration *prometheus.HistogramVec
droppedLogs prometheus.Counter
}
)
@ -47,6 +48,7 @@ const (
requestsTotalMetric = "requests_total"
errorsTotalMetric = "errors_total"
bytesTotalMetric = "bytes_total"
droppedLogs = "dropped_logs"
)
const (
@ -61,6 +63,7 @@ func newAPIStatMetrics() *APIStatMetrics {
stats: newHTTPStats(),
httpRequestsDuration: mustNewHistogramVec(histogramDesc,
[]float64{.05, .1, .25, .5, 1, 2.5, 5, 10}),
droppedLogs: mustNewCounter(appMetricsDesc[statisticSubsystem][droppedLogs]),
}
}
@ -119,6 +122,7 @@ func (a *APIStatMetrics) Describe(ch chan<- *prometheus.Desc) {
return
}
a.stats.Describe(ch)
a.droppedLogs.Describe(ch)
a.httpRequestsDuration.Describe(ch)
}
@ -127,9 +131,17 @@ func (a *APIStatMetrics) Collect(ch chan<- prometheus.Metric) {
return
}
a.stats.Collect(ch)
a.droppedLogs.Collect(ch)
a.httpRequestsDuration.Collect(ch)
}
func (a *APIStatMetrics) DroppedLogsInc() {
if a == nil {
return
}
a.droppedLogs.Inc()
}
func newHTTPStats() *httpStats {
return &httpStats{
currentS3RequestsDesc: newDesc(appMetricsDesc[statisticSubsystem][requestsCurrentMetric]),