policer: add check for repeated error logging #914
1 changed files with 16 additions and 1 deletions
|
@ -7,6 +7,7 @@ import (
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/engine"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/engine"
|
||||||
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -34,6 +35,9 @@ func (p *Policer) shardPolicyWorker(ctx context.Context) {
|
||||||
p.log.Warn(logs.PolicerFailureAtObjectSelectForReplication, zap.Error(err))
|
p.log.Warn(logs.PolicerFailureAtObjectSelectForReplication, zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// contains all errors logged in this iteration for each container
|
||||||
|
cnrErrSkip := make(map[cid.ID][]error)
|
||||||
|
|
||||||
for i := range addrs {
|
for i := range addrs {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
@ -55,10 +59,12 @@ func (p *Policer) shardPolicyWorker(ctx context.Context) {
|
||||||
|
|
||||||
|
|||||||
if p.objsInWork.add(addr.Address) {
|
if p.objsInWork.add(addr.Address) {
|
||||||
err := p.processObject(ctx, addr)
|
err := p.processObject(ctx, addr)
|
||||||
if err != nil {
|
if err != nil && !skipLog(cnrErrSkip[addr.Address.Container()], err) {
|
||||||
p.log.Error(logs.PolicerUnableToProcessObj,
|
p.log.Error(logs.PolicerUnableToProcessObj,
|
||||||
zap.Stringer("object", addr.Address),
|
zap.Stringer("object", addr.Address),
|
||||||
zap.String("error", err.Error()))
|
zap.String("error", err.Error()))
|
||||||
|
|
||||||
|
cnrErrSkip[addr.Address.Container()] = append(cnrErrSkip[addr.Address.Container()], err)
|
||||||
}
|
}
|
||||||
p.cache.Add(addr.Address, time.Now())
|
p.cache.Add(addr.Address, time.Now())
|
||||||
p.objsInWork.remove(addr.Address)
|
p.objsInWork.remove(addr.Address)
|
||||||
|
@ -72,3 +78,12 @@ func (p *Policer) shardPolicyWorker(ctx context.Context) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
aarifullin
commented
I am sure this method works fine in your case :) And my change request is absolutely optional. But it works until incoming
and then
I am sure this method works fine in your case :) And my change request is absolutely optional. But it works until incoming `err` gets wrapped for some reason by another error. So, better to not compare them like strings, but make
```go
cnrErrSkip := make(map[cid.ID][]error)
```
and then
```go
import errors
/* ... */
func skipLog(errs []error, err err) bool {
for _, e := range errs {
if errors.Is(err, e) {
return true
}
}
return false
}
```
fyrchik
commented
Btw, another think to think about is that we can fail to connect to different nodes -- I would like to see this in logs. Btw, another think to think about is that we can fail to connect to different nodes -- I would like to see this in logs.
But it is probably not too important -- if a node is isolated, lot's of other errors will occur.
|
|||||||
|
|
||||||
|
func skipLog(errs []error, err error) bool {
|
||||||
fyrchik
commented
Are you sure this will work correctly? For pointer newly created pointer types (consider Are you sure this will work correctly? For pointer newly created pointer types (consider `fmt.Errorf`) we will have `false` here.
elebedeva
commented
fixed fixed
|
|||||||
|
for _, e := range errs {
|
||||||
|
if errors.Is(err, e) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue
processObject
can return errors not only from container build, but also because some nodes are unavailable.Can you give a list of errors (not full, just some groups) which we will ignore here?
It seems everything is ok, the last error is
return fmt.Errorf("%s: %w", logs.PolicerCouldNotBuildPlacementVectorForObject, err)
-- it will fail for all objects simultaneously, but some comment would still be nice, like "we can receive only 3 types of error, thus ignoring is safe".
Maybe even do this in the commit message.