2020-12-18 09:27:19 +00:00
|
|
|
package audit
|
|
|
|
|
2020-12-18 12:52:27 +00:00
|
|
|
import (
|
2020-12-21 08:40:30 +00:00
|
|
|
"context"
|
2022-05-12 16:37:46 +00:00
|
|
|
"crypto/sha256"
|
2020-12-21 08:40:30 +00:00
|
|
|
|
2023-04-12 14:35:10 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
2023-03-07 13:38:26 +00:00
|
|
|
clientcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/client"
|
|
|
|
netmapcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/netmap"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/storagegroup"
|
|
|
|
cntClient "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/morph/client/container"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/audit"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object_manager/placement"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/rand"
|
|
|
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
|
|
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
2020-12-18 12:52:27 +00:00
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
2020-12-18 09:27:19 +00:00
|
|
|
func (ap *Processor) processStartAudit(epoch uint64) {
|
2020-12-21 08:40:30 +00:00
|
|
|
log := ap.log.With(zap.Uint64("epoch", epoch))
|
2020-12-18 12:52:27 +00:00
|
|
|
|
2020-12-22 00:28:42 +00:00
|
|
|
ap.prevAuditCanceler()
|
2020-12-22 12:34:38 +00:00
|
|
|
|
|
|
|
skipped := ap.taskManager.Reset()
|
|
|
|
if skipped > 0 {
|
2023-04-12 14:35:10 +00:00
|
|
|
ap.log.Info(logs.AuditSomeTasksFromPreviousEpochAreSkipped,
|
2020-12-22 12:34:38 +00:00
|
|
|
zap.Int("amount", skipped),
|
|
|
|
)
|
|
|
|
}
|
2020-12-22 00:28:42 +00:00
|
|
|
|
2020-12-18 12:52:27 +00:00
|
|
|
containers, err := ap.selectContainersToAudit(epoch)
|
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Error(logs.AuditContainerSelectionFailure, zap.String("error", err.Error()))
|
2020-12-21 08:40:30 +00:00
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Info(logs.AuditSelectContainersForAudit, zap.Int("amount", len(containers)))
|
2020-12-21 08:40:30 +00:00
|
|
|
|
|
|
|
nm, err := ap.netmapClient.GetNetMap(0)
|
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
ap.log.Error(logs.AuditCantFetchNetworkMap,
|
2020-12-18 12:52:27 +00:00
|
|
|
zap.String("error", err.Error()))
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-04-06 08:36:25 +00:00
|
|
|
cancelChannel := make(chan struct{})
|
|
|
|
ap.prevAuditCanceler = func() {
|
|
|
|
select {
|
|
|
|
case <-cancelChannel: // already closed
|
|
|
|
default:
|
|
|
|
close(cancelChannel)
|
|
|
|
}
|
|
|
|
}
|
2020-12-24 14:40:29 +00:00
|
|
|
|
2022-05-12 16:37:46 +00:00
|
|
|
pivot := make([]byte, sha256.Size)
|
|
|
|
|
2023-04-06 08:36:25 +00:00
|
|
|
ap.startAuditTasksOnContainers(cancelChannel, containers, log, pivot, nm, epoch)
|
2023-03-29 14:33:49 +00:00
|
|
|
}
|
|
|
|
|
2023-04-06 08:36:25 +00:00
|
|
|
func (ap *Processor) startAuditTasksOnContainers(cancelChannel <-chan struct{}, containers []cid.ID, log *zap.Logger, pivot []byte, nm *netmap.NetMap, epoch uint64) {
|
2020-12-21 08:40:30 +00:00
|
|
|
for i := range containers {
|
2022-01-31 13:34:01 +00:00
|
|
|
cnr, err := cntClient.Get(ap.containerClient, containers[i]) // get container structure
|
2020-12-21 08:40:30 +00:00
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Error(logs.AuditCantGetContainerInfoIgnore,
|
2020-12-21 08:40:30 +00:00
|
|
|
zap.Stringer("cid", containers[i]),
|
|
|
|
zap.String("error", err.Error()))
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2022-05-12 16:37:46 +00:00
|
|
|
containers[i].Encode(pivot)
|
2021-01-26 16:57:03 +00:00
|
|
|
|
2020-12-21 08:40:30 +00:00
|
|
|
// find all container nodes for current epoch
|
2022-06-28 07:01:05 +00:00
|
|
|
nodes, err := nm.ContainerNodes(cnr.Value.PlacementPolicy(), pivot)
|
2020-12-21 08:40:30 +00:00
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Info(logs.AuditCantBuildPlacementForContainerIgnore,
|
2020-12-21 08:40:30 +00:00
|
|
|
zap.Stringer("cid", containers[i]),
|
|
|
|
zap.String("error", err.Error()))
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2022-06-08 23:18:26 +00:00
|
|
|
n := placement.FlattenNodes(nodes)
|
2020-12-23 15:01:03 +00:00
|
|
|
|
|
|
|
// shuffle nodes to ask a random one
|
2022-01-11 14:07:59 +00:00
|
|
|
rand.Shuffle(len(n), func(i, j int) {
|
2020-12-21 08:40:30 +00:00
|
|
|
n[i], n[j] = n[j], n[i]
|
|
|
|
})
|
|
|
|
|
|
|
|
// search storage groups
|
2022-05-19 19:40:22 +00:00
|
|
|
storageGroupsIDs := ap.findStorageGroups(containers[i], n)
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Info(logs.AuditSelectStorageGroupsForAudit,
|
2022-05-19 19:40:22 +00:00
|
|
|
zap.Stringer("cid", containers[i]),
|
|
|
|
zap.Int("amount", len(storageGroupsIDs)))
|
|
|
|
|
|
|
|
// filter expired storage groups
|
|
|
|
storageGroups := ap.filterExpiredSG(containers[i], storageGroupsIDs, nodes, *nm)
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Info(logs.AuditFilterExpiredStorageGroupsForAudit,
|
2020-12-21 08:40:30 +00:00
|
|
|
zap.Stringer("cid", containers[i]),
|
|
|
|
zap.Int("amount", len(storageGroups)))
|
|
|
|
|
2022-05-19 19:40:22 +00:00
|
|
|
// skip audit for containers without
|
|
|
|
// non-expired storage groups
|
|
|
|
if len(storageGroupsIDs) == 0 {
|
2021-07-06 18:12:10 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-12-22 00:28:42 +00:00
|
|
|
auditTask := new(audit.Task).
|
|
|
|
WithReporter(&epochAuditReporter{
|
|
|
|
epoch: epoch,
|
|
|
|
rep: ap.reporter,
|
|
|
|
}).
|
2023-04-06 08:36:25 +00:00
|
|
|
WithCancelChannel(cancelChannel).
|
2020-12-22 00:28:42 +00:00
|
|
|
WithContainerID(containers[i]).
|
|
|
|
WithStorageGroupList(storageGroups).
|
2022-06-22 10:55:31 +00:00
|
|
|
WithContainerStructure(cnr.Value).
|
2020-12-23 09:51:37 +00:00
|
|
|
WithContainerNodes(nodes).
|
|
|
|
WithNetworkMap(nm)
|
2020-12-22 00:28:42 +00:00
|
|
|
|
2023-04-06 08:36:25 +00:00
|
|
|
ap.taskManager.PushTask(auditTask)
|
2020-12-21 08:40:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-08 23:18:26 +00:00
|
|
|
func (ap *Processor) findStorageGroups(cnr cid.ID, shuffled netmapcore.Nodes) []oid.ID {
|
2022-05-31 17:00:41 +00:00
|
|
|
var sg []oid.ID
|
2020-12-21 08:40:30 +00:00
|
|
|
|
|
|
|
ln := len(shuffled)
|
|
|
|
|
2021-10-27 12:12:05 +00:00
|
|
|
var (
|
|
|
|
info clientcore.NodeInfo
|
2022-07-01 18:16:28 +00:00
|
|
|
prm storagegroup.SearchSGPrm
|
2021-10-27 12:12:05 +00:00
|
|
|
)
|
|
|
|
|
2022-07-01 18:16:28 +00:00
|
|
|
prm.Container = cnr
|
2021-09-28 05:32:30 +00:00
|
|
|
|
2020-12-21 08:40:30 +00:00
|
|
|
for i := range shuffled { // consider iterating over some part of container
|
|
|
|
log := ap.log.With(
|
2022-05-31 17:00:41 +00:00
|
|
|
zap.Stringer("cid", cnr),
|
2022-10-11 12:49:34 +00:00
|
|
|
zap.String("key", netmap.StringifyPublicKey(shuffled[0])),
|
2020-12-21 08:40:30 +00:00
|
|
|
zap.Int("try", i),
|
|
|
|
zap.Int("total_tries", ln),
|
|
|
|
)
|
|
|
|
|
2022-06-08 23:18:26 +00:00
|
|
|
err := clientcore.NodeInfoFromRawNetmapElement(&info, netmapcore.Node(shuffled[i]))
|
2020-12-21 08:40:30 +00:00
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Warn(logs.AuditParseClientNodeInfo, zap.String("error", err.Error()))
|
2020-12-23 08:45:36 +00:00
|
|
|
|
|
|
|
continue
|
2020-12-21 08:40:30 +00:00
|
|
|
}
|
|
|
|
|
2021-10-27 12:12:05 +00:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), ap.searchTimeout)
|
2020-12-21 08:40:30 +00:00
|
|
|
|
2022-07-01 18:16:28 +00:00
|
|
|
prm.NodeInfo = info
|
2020-12-21 08:40:30 +00:00
|
|
|
|
2022-07-01 18:16:28 +00:00
|
|
|
var dst storagegroup.SearchSGDst
|
2021-10-27 12:12:05 +00:00
|
|
|
|
2023-04-06 10:41:45 +00:00
|
|
|
err = ap.sgSrc.ListSG(ctx, &dst, prm)
|
2020-12-21 08:40:30 +00:00
|
|
|
|
|
|
|
cancel()
|
|
|
|
|
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
log.Warn(logs.AuditErrorInStorageGroupSearch, zap.String("error", err.Error()))
|
2020-12-21 08:40:30 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2022-07-01 18:16:28 +00:00
|
|
|
sg = append(sg, dst.Objects...)
|
2020-12-21 08:40:30 +00:00
|
|
|
|
|
|
|
break // we found storage groups, so break loop
|
|
|
|
}
|
|
|
|
|
|
|
|
return sg
|
|
|
|
}
|
2022-05-19 19:40:22 +00:00
|
|
|
|
|
|
|
func (ap *Processor) filterExpiredSG(cid cid.ID, sgIDs []oid.ID,
|
2022-07-01 18:45:18 +00:00
|
|
|
cnr [][]netmap.NodeInfo, nm netmap.NetMap) []storagegroup.StorageGroup {
|
|
|
|
sgs := make([]storagegroup.StorageGroup, 0, len(sgIDs))
|
|
|
|
var coreSG storagegroup.StorageGroup
|
2022-05-19 19:40:22 +00:00
|
|
|
|
2022-07-01 18:16:28 +00:00
|
|
|
var getSGPrm storagegroup.GetSGPrm
|
2022-05-19 19:40:22 +00:00
|
|
|
getSGPrm.CID = cid
|
|
|
|
getSGPrm.Container = cnr
|
|
|
|
getSGPrm.NetMap = nm
|
|
|
|
|
|
|
|
for _, sgID := range sgIDs {
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), ap.searchTimeout)
|
|
|
|
|
|
|
|
getSGPrm.OID = sgID
|
|
|
|
|
2023-04-06 10:41:45 +00:00
|
|
|
sg, err := ap.sgSrc.GetSG(ctx, getSGPrm)
|
2022-05-19 19:40:22 +00:00
|
|
|
|
|
|
|
cancel()
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
ap.log.Error(
|
|
|
|
"could not get storage group object for audit, skipping",
|
|
|
|
zap.Stringer("cid", cid),
|
|
|
|
zap.Stringer("oid", sgID),
|
|
|
|
zap.Error(err),
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// filter expired epochs
|
2022-09-05 05:48:42 +00:00
|
|
|
if sg.ExpirationEpoch() >= ap.epochSrc.EpochCounter() {
|
2022-07-01 18:45:18 +00:00
|
|
|
coreSG.SetID(sgID)
|
|
|
|
coreSG.SetStorageGroup(*sg)
|
|
|
|
|
|
|
|
sgs = append(sgs, coreSG)
|
2022-05-19 19:40:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return sgs
|
|
|
|
}
|