forked from TrueCloudLab/frostfs-node
[#242] treesvc: Add tracing spans
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
parent
d62c6e4ce6
commit
6121b541b5
13 changed files with 601 additions and 195 deletions
|
@ -7,8 +7,8 @@ import (
|
|||
)
|
||||
|
||||
// DropTree drops a tree from the database. If treeID is empty, all the trees are dropped.
|
||||
func (s *Service) DropTree(_ context.Context, cid cid.ID, treeID string) error {
|
||||
func (s *Service) DropTree(ctx context.Context, cid cid.ID, treeID string) error {
|
||||
// The only current use-case is a container removal, where all trees should be removed.
|
||||
// Thus there is no need to replicate the operation on other node.
|
||||
return s.forest.TreeDrop(cid, treeID)
|
||||
return s.forest.TreeDrop(ctx, cid, treeID)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package tree
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
|
@ -32,7 +33,7 @@ func TestGetSubTree(t *testing.T) {
|
|||
meta := []pilorama.KeyValue{
|
||||
{Key: pilorama.AttributeFilename, Value: []byte(path[len(path)-1])}}
|
||||
|
||||
lm, err := p.TreeAddByPath(d, treeID, pilorama.AttributeFilename, path[:len(path)-1], meta)
|
||||
lm, err := p.TreeAddByPath(context.Background(), d, treeID, pilorama.AttributeFilename, path[:len(path)-1], meta)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, len(lm))
|
||||
|
||||
|
@ -41,7 +42,7 @@ func TestGetSubTree(t *testing.T) {
|
|||
|
||||
testGetSubTree := func(t *testing.T, rootID uint64, depth uint32, errIndex int) []uint64 {
|
||||
acc := subTreeAcc{errIndex: errIndex}
|
||||
err := getSubTree(&acc, d.CID, &GetSubTreeRequest_Body{
|
||||
err := getSubTree(context.Background(), &acc, d.CID, &GetSubTreeRequest_Body{
|
||||
TreeId: treeID,
|
||||
RootId: rootID,
|
||||
Depth: depth,
|
||||
|
@ -68,7 +69,7 @@ func TestGetSubTree(t *testing.T) {
|
|||
// GetSubTree must return valid meta.
|
||||
for i := range acc.seen {
|
||||
b := acc.seen[i].Body
|
||||
meta, node, err := p.TreeGetMeta(d.CID, treeID, b.NodeId)
|
||||
meta, node, err := p.TreeGetMeta(context.Background(), d.CID, treeID, b.NodeId)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, node, b.ParentId)
|
||||
require.Equal(t, meta.Time, b.Timestamp)
|
||||
|
|
|
@ -5,8 +5,11 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/pkg/tracing"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||
netmapSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
|
@ -25,6 +28,12 @@ func (s *Service) forEachNode(ctx context.Context, cntNodes []netmapSDK.NodeInfo
|
|||
for _, n := range cntNodes {
|
||||
var stop bool
|
||||
n.IterateNetworkEndpoints(func(endpoint string) bool {
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.IterateNetworkEndpoints",
|
||||
trace.WithAttributes(
|
||||
attribute.String("endpoint", endpoint),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
c, err := s.cache.get(ctx, endpoint)
|
||||
if err != nil {
|
||||
return false
|
||||
|
|
|
@ -8,10 +8,13 @@ import (
|
|||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/pkg/tracing"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
netmapSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
|
@ -38,17 +41,25 @@ const (
|
|||
defaultReplicatorSendTimeout = time.Second * 5
|
||||
)
|
||||
|
||||
func (s *Service) localReplicationWorker() {
|
||||
func (s *Service) localReplicationWorker(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
return
|
||||
case op := <-s.replicateLocalCh:
|
||||
err := s.forest.TreeApply(op.cid, op.treeID, &op.Move, false)
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.HandleReplicationOperation",
|
||||
trace.WithAttributes(
|
||||
attribute.String("tree_id", op.treeID),
|
||||
attribute.String("container_id", op.cid.EncodeToString()),
|
||||
),
|
||||
)
|
||||
|
||||
err := s.forest.TreeApply(ctx, op.cid, op.treeID, &op.Move, false)
|
||||
if err != nil {
|
||||
s.log.Error(logs.TreeFailedToApplyReplicatedOperation,
|
||||
zap.String("err", err.Error()))
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,10 +70,24 @@ func (s *Service) replicationWorker(ctx context.Context) {
|
|||
case <-s.closeCh:
|
||||
return
|
||||
case task := <-s.replicationTasks:
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.HandleReplicationTask",
|
||||
trace.WithAttributes(
|
||||
attribute.String("public_key", hex.EncodeToString(task.n.PublicKey())),
|
||||
),
|
||||
)
|
||||
|
||||
var lastErr error
|
||||
var lastAddr string
|
||||
|
||||
task.n.IterateNetworkEndpoints(func(addr string) bool {
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.HandleReplicationTaskOnEndpoint",
|
||||
trace.WithAttributes(
|
||||
attribute.String("public_key", hex.EncodeToString(task.n.PublicKey())),
|
||||
attribute.String("address", addr),
|
||||
),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
lastAddr = addr
|
||||
|
||||
c, err := s.cache.get(ctx, addr)
|
||||
|
@ -89,6 +114,7 @@ func (s *Service) replicationWorker(ctx context.Context) {
|
|||
zap.String("key", hex.EncodeToString(task.n.PublicKey())))
|
||||
}
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -96,7 +122,7 @@ func (s *Service) replicationWorker(ctx context.Context) {
|
|||
func (s *Service) replicateLoop(ctx context.Context) {
|
||||
for i := 0; i < s.replicatorWorkerCount; i++ {
|
||||
go s.replicationWorker(ctx)
|
||||
go s.localReplicationWorker()
|
||||
go s.localReplicationWorker(ctx)
|
||||
}
|
||||
defer func() {
|
||||
for len(s.replicationTasks) != 0 {
|
||||
|
|
|
@ -119,7 +119,7 @@ func (s *Service) Add(ctx context.Context, req *AddRequest) (*AddResponse, error
|
|||
}
|
||||
|
||||
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
|
||||
log, err := s.forest.TreeMove(d, b.GetTreeId(), &pilorama.Move{
|
||||
log, err := s.forest.TreeMove(ctx, d, b.GetTreeId(), &pilorama.Move{
|
||||
Parent: b.GetParentId(),
|
||||
Child: pilorama.RootID,
|
||||
Meta: pilorama.Meta{Items: protoToMeta(b.GetMeta())},
|
||||
|
@ -174,7 +174,7 @@ func (s *Service) AddByPath(ctx context.Context, req *AddByPathRequest) (*AddByP
|
|||
}
|
||||
|
||||
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
|
||||
logs, err := s.forest.TreeAddByPath(d, b.GetTreeId(), attr, b.GetPath(), meta)
|
||||
logs, err := s.forest.TreeAddByPath(ctx, d, b.GetTreeId(), attr, b.GetPath(), meta)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -231,7 +231,7 @@ func (s *Service) Remove(ctx context.Context, req *RemoveRequest) (*RemoveRespon
|
|||
}
|
||||
|
||||
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
|
||||
log, err := s.forest.TreeMove(d, b.GetTreeId(), &pilorama.Move{
|
||||
log, err := s.forest.TreeMove(ctx, d, b.GetTreeId(), &pilorama.Move{
|
||||
Parent: pilorama.TrashID,
|
||||
Child: b.GetNodeId(),
|
||||
})
|
||||
|
@ -280,7 +280,7 @@ func (s *Service) Move(ctx context.Context, req *MoveRequest) (*MoveResponse, er
|
|||
}
|
||||
|
||||
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
|
||||
log, err := s.forest.TreeMove(d, b.GetTreeId(), &pilorama.Move{
|
||||
log, err := s.forest.TreeMove(ctx, d, b.GetTreeId(), &pilorama.Move{
|
||||
Parent: b.GetParentId(),
|
||||
Child: b.GetNodeId(),
|
||||
Meta: pilorama.Meta{Items: protoToMeta(b.GetMeta())},
|
||||
|
@ -328,14 +328,14 @@ func (s *Service) GetNodeByPath(ctx context.Context, req *GetNodeByPathRequest)
|
|||
attr = pilorama.AttributeFilename
|
||||
}
|
||||
|
||||
nodes, err := s.forest.TreeGetByPath(cid, b.GetTreeId(), attr, b.GetPath(), b.GetLatestOnly())
|
||||
nodes, err := s.forest.TreeGetByPath(ctx, cid, b.GetTreeId(), attr, b.GetPath(), b.GetLatestOnly())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
info := make([]*GetNodeByPathResponse_Info, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
m, parent, err := s.forest.TreeGetMeta(cid, b.GetTreeId(), node)
|
||||
m, parent, err := s.forest.TreeGetMeta(ctx, cid, b.GetTreeId(), node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -406,10 +406,10 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS
|
|||
return nil
|
||||
}
|
||||
|
||||
return getSubTree(srv, cid, b, s.forest)
|
||||
return getSubTree(srv.Context(), srv, cid, b, s.forest)
|
||||
}
|
||||
|
||||
func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
|
||||
func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
|
||||
// Traverse the tree in a DFS manner. Because we need to support arbitrary depth,
|
||||
// recursive implementation is not suitable here, so we maintain explicit stack.
|
||||
stack := [][]uint64{{b.GetRootId()}}
|
||||
|
@ -425,7 +425,7 @@ func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRe
|
|||
nodeID := stack[len(stack)-1][0]
|
||||
stack[len(stack)-1] = stack[len(stack)-1][1:]
|
||||
|
||||
m, p, err := forest.TreeGetMeta(cid, b.GetTreeId(), nodeID)
|
||||
m, p, err := forest.TreeGetMeta(ctx, cid, b.GetTreeId(), nodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -442,7 +442,7 @@ func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRe
|
|||
}
|
||||
|
||||
if b.GetDepth() == 0 || uint32(len(stack)) < b.GetDepth() {
|
||||
children, err := forest.TreeGetChildren(cid, b.GetTreeId(), nodeID)
|
||||
children, err := forest.TreeGetChildren(ctx, cid, b.GetTreeId(), nodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -455,7 +455,7 @@ func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRe
|
|||
}
|
||||
|
||||
// Apply locally applies operation from the remote node to the tree.
|
||||
func (s *Service) Apply(_ context.Context, req *ApplyRequest) (*ApplyResponse, error) {
|
||||
func (s *Service) Apply(ctx context.Context, req *ApplyRequest) (*ApplyResponse, error) {
|
||||
err := verifyMessage(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -468,7 +468,7 @@ func (s *Service) Apply(_ context.Context, req *ApplyRequest) (*ApplyResponse, e
|
|||
|
||||
key := req.GetSignature().GetKey()
|
||||
|
||||
_, pos, _, err := s.getContainerInfo(cid, key)
|
||||
_, pos, _, err := s.getContainerInfo(ctx, cid, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -532,7 +532,7 @@ func (s *Service) GetOpLog(req *GetOpLogRequest, srv TreeService_GetOpLogServer)
|
|||
|
||||
h := b.GetHeight()
|
||||
for {
|
||||
lm, err := s.forest.TreeGetOpLog(cid, b.GetTreeId(), h)
|
||||
lm, err := s.forest.TreeGetOpLog(srv.Context(), cid, b.GetTreeId(), h)
|
||||
if err != nil || lm.Time == 0 {
|
||||
return err
|
||||
}
|
||||
|
@ -587,7 +587,7 @@ func (s *Service) TreeList(ctx context.Context, req *TreeListRequest) (*TreeList
|
|||
return resp, outErr
|
||||
}
|
||||
|
||||
ids, err := s.forest.TreeList(cid)
|
||||
ids, err := s.forest.TreeList(ctx, cid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -623,7 +623,7 @@ func metaToProto(arr []pilorama.KeyValue) []*KeyValue {
|
|||
|
||||
// getContainerInfo returns the list of container nodes, position in the container for the node
|
||||
// with pub key and total amount of nodes in all replicas.
|
||||
func (s *Service) getContainerInfo(cid cidSDK.ID, pub []byte) ([]netmapSDK.NodeInfo, int, int, error) {
|
||||
func (s *Service) getContainerInfo(ctx context.Context, cid cidSDK.ID, pub []byte) ([]netmapSDK.NodeInfo, int, int, error) {
|
||||
cntNodes, _, err := s.getContainerNodes(cid)
|
||||
if err != nil {
|
||||
return nil, 0, 0, err
|
||||
|
|
|
@ -85,7 +85,7 @@ func (s *Service) synchronizeAllTrees(ctx context.Context, cid cid.ID) error {
|
|||
}
|
||||
|
||||
for _, tid := range treesToSync {
|
||||
h, err := s.forest.TreeLastSyncHeight(cid, tid)
|
||||
h, err := s.forest.TreeLastSyncHeight(ctx, cid, tid)
|
||||
if err != nil && !errors.Is(err, pilorama.ErrTreeNotFound) {
|
||||
s.log.Warn(logs.TreeCouldNotGetLastSynchronizedHeightForATree,
|
||||
zap.Stringer("cid", cid),
|
||||
|
@ -94,7 +94,7 @@ func (s *Service) synchronizeAllTrees(ctx context.Context, cid cid.ID) error {
|
|||
}
|
||||
newHeight := s.synchronizeTree(ctx, cid, h, tid, nodes)
|
||||
if h < newHeight {
|
||||
if err := s.forest.TreeUpdateLastSyncHeight(cid, tid, newHeight); err != nil {
|
||||
if err := s.forest.TreeUpdateLastSyncHeight(ctx, cid, tid, newHeight); err != nil {
|
||||
s.log.Warn(logs.TreeCouldNotUpdateLastSynchronizedHeightForATree,
|
||||
zap.Stringer("cid", cid),
|
||||
zap.String("tree", tid))
|
||||
|
@ -232,7 +232,7 @@ func (s *Service) synchronizeSingle(ctx context.Context, cid cid.ID, treeID stri
|
|||
if err := m.Meta.FromBytes(lm.Meta); err != nil {
|
||||
return newHeight, err
|
||||
}
|
||||
if err := s.forest.TreeApply(cid, treeID, m, true); err != nil {
|
||||
if err := s.forest.TreeApply(ctx, cid, treeID, m, true); err != nil {
|
||||
return newHeight, err
|
||||
}
|
||||
if m.Time > newHeight {
|
||||
|
@ -284,11 +284,13 @@ func (s *Service) syncLoop(ctx context.Context) {
|
|||
case <-ctx.Done():
|
||||
return
|
||||
case <-s.syncChan:
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.sync")
|
||||
s.log.Debug(logs.TreeSyncingTrees)
|
||||
|
||||
cnrs, err := s.cfg.cnrSource.List()
|
||||
if err != nil {
|
||||
s.log.Error(logs.TreeCouldNotFetchContainers, zap.Error(err))
|
||||
span.End()
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -299,11 +301,15 @@ func (s *Service) syncLoop(ctx context.Context) {
|
|||
s.removeContainers(ctx, newMap)
|
||||
|
||||
s.log.Debug(logs.TreeTreesHaveBeenSynchronized)
|
||||
span.End()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) syncContainers(ctx context.Context, cnrs []cid.ID) {
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.syncContainers")
|
||||
defer span.End()
|
||||
|
||||
// sync new containers
|
||||
var wg sync.WaitGroup
|
||||
for _, cnr := range cnrs {
|
||||
|
@ -335,6 +341,9 @@ func (s *Service) syncContainers(ctx context.Context, cnrs []cid.ID) {
|
|||
}
|
||||
|
||||
func (s *Service) removeContainers(ctx context.Context, newContainers map[cid.ID]struct{}) {
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.removeContainers")
|
||||
defer span.End()
|
||||
|
||||
s.cnrMapMtx.Lock()
|
||||
defer s.cnrMapMtx.Unlock()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue