Compare commits

...

3 commits

Author SHA1 Message Date
b20298007e [#741] treesvc: Refactor tree sync
Fix linter issues.
Add error logging.

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-11-28 17:04:02 +03:00
1c735efedd [#741] treesvc: Do not update sync height if some node is unavailable
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-11-28 10:51:15 +03:00
b8a3f17759 [#741] treesvc: Remove unused height variables
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-11-28 10:12:39 +03:00
2 changed files with 71 additions and 58 deletions

View file

@ -64,6 +64,9 @@ const (
TreeCouldNotUpdateLastSynchronizedHeightForATree = "could not update last synchronized height for a tree"
TreeSynchronizeTree = "synchronize tree"
TreeFailedToRunTreeSynchronizationOverAllNodes = "failed to run tree synchronization over all nodes"
TreeFailedToRunTreeSynchronizationForSpecificNode = "failed to run tree synchronization for specific node"
TreeFailedToParseAddressForTreeSynchronization = "failed to parse address for tree synchronization"
TreeFailedToConnectForTreeSynchronization = "failed to connect for tree synchronization"
TreeSyncingTrees = "syncing trees..."
TreeCouldNotFetchContainers = "could not fetch containers"
TreeTreesHaveBeenSynchronized = "trees have been synchronized"

View file

@ -9,6 +9,7 @@ import (
"math"
"math/rand"
"sync"
"sync/atomic"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
@ -216,45 +217,44 @@ func (s *Service) applyOperationStream(ctx context.Context, cid cid.ID, treeID s
}
func (s *Service) startStream(ctx context.Context, cid cid.ID, treeID string,
height uint64, treeClient TreeServiceClient, opsCh chan<- *pilorama.Move,
) (uint64, error) {
height uint64, cc *grpc.ClientConn, opsCh chan<- *pilorama.Move,
) error {
treeClient := NewTreeServiceClient(cc)
rawCID := make([]byte, sha256.Size)
cid.Encode(rawCID)
for {
newHeight := height
req := &GetOpLogRequest{
Body: &GetOpLogRequest_Body{
ContainerId: rawCID,
TreeId: treeID,
Height: newHeight,
},
}
if err := SignMessage(req, s.key); err != nil {
return 0, err
}
c, err := treeClient.GetOpLog(ctx, req)
if err != nil {
return 0, fmt.Errorf("can't initialize client: %w", err)
}
res, err := c.Recv()
for ; err == nil; res, err = c.Recv() {
lm := res.GetBody().GetOperation()
m := &pilorama.Move{
Parent: lm.ParentId,
Child: lm.ChildId,
}
if err := m.Meta.FromBytes(lm.Meta); err != nil {
return 0, err
}
opsCh <- m
}
if height == newHeight || err != nil && !errors.Is(err, io.EOF) {
return newHeight, err
}
height = newHeight
req := &GetOpLogRequest{
Body: &GetOpLogRequest_Body{
ContainerId: rawCID,
TreeId: treeID,
Height: height,
},
}
if err := SignMessage(req, s.key); err != nil {
return err
}
c, err := treeClient.GetOpLog(ctx, req)
if err != nil {
return fmt.Errorf("can't initialize client: %w", err)
}
res, err := c.Recv()
for ; err == nil; res, err = c.Recv() {
lm := res.GetBody().GetOperation()
m := &pilorama.Move{
Parent: lm.ParentId,
Child: lm.ChildId,
}
if err := m.Meta.FromBytes(lm.Meta); err != nil {
return err
}
opsCh <- m
}
if err != nil && !errors.Is(err, io.EOF) {
return err
}
return nil
}
// synchronizeTree synchronizes operations getting them from different nodes.
@ -287,50 +287,44 @@ func (s *Service) synchronizeTree(ctx context.Context, cid cid.ID, from uint64,
return nil
})
var allNodesSynced atomic.Bool
allNodesSynced.Store(true)
for i, n := range nodes {
i := i
n := n
errGroup.Go(func() error {
height := from
var nodeSynced bool
n.IterateNetworkEndpoints(func(addr string) bool {
var a network.Address
if err := a.FromString(addr); err != nil {
s.log.Warn(logs.TreeFailedToParseAddressForTreeSynchronization, zap.Error(err), zap.String("address", addr))
return false
}
cc, err := grpc.DialContext(egCtx, a.URIAddr(),
grpc.WithChainUnaryInterceptor(
metrics.NewUnaryClientInterceptor(),
tracing_grpc.NewUnaryClientInteceptor(),
),
grpc.WithChainStreamInterceptor(
metrics.NewStreamClientInterceptor(),
tracing_grpc.NewStreamClientInterceptor(),
),
grpc.WithTransportCredentials(insecure.NewCredentials()))
cc, err := s.dialCtx(egCtx, a)
if err != nil {
// Failed to connect, try the next address.
s.log.Warn(logs.TreeFailedToConnectForTreeSynchronization, zap.Error(err), zap.String("address", addr))
return false
}
defer cc.Close()
treeClient := NewTreeServiceClient(cc)
for {
h, err := s.startStream(egCtx, cid, treeID, from, treeClient, nodeOperationStreams[i])
if height < h {
height = h
}
if err != nil || h <= height {
// Error with the response, try the next node.
return true
}
err = s.startStream(egCtx, cid, treeID, from, cc, nodeOperationStreams[i])
if err != nil {
s.log.Warn(logs.TreeFailedToRunTreeSynchronizationForSpecificNode, zap.Error(err), zap.String("address", addr))
}
nodeSynced = err == nil
return true
})
close(nodeOperationStreams[i])
if !nodeSynced {
allNodesSynced.Store(false)
}
return nil
})
}
if err := errGroup.Wait(); err != nil {
allNodesSynced.Store(false)
s.log.Warn(logs.TreeFailedToRunTreeSynchronizationOverAllNodes, zap.Error(err))
}
@ -340,7 +334,23 @@ func (s *Service) synchronizeTree(ctx context.Context, cid cid.ID, from uint64,
} else {
newHeight++
}
return newHeight
if allNodesSynced.Load() {
return newHeight
}
return from
}
func (*Service) dialCtx(egCtx context.Context, a network.Address) (*grpc.ClientConn, error) {
return grpc.DialContext(egCtx, a.URIAddr(),
grpc.WithChainUnaryInterceptor(
metrics.NewUnaryClientInterceptor(),
tracing_grpc.NewUnaryClientInteceptor(),
),
grpc.WithChainStreamInterceptor(
metrics.NewStreamClientInterceptor(),
tracing_grpc.NewStreamClientInterceptor(),
),
grpc.WithTransportCredentials(insecure.NewCredentials()))
}
// ErrAlreadySyncing is returned when a service synchronization has already