[#242] treesvc: Add tracing spans

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2023-04-13 15:36:20 +03:00 committed by fyrchik
parent d62c6e4ce6
commit 6121b541b5
13 changed files with 601 additions and 195 deletions

View file

@ -7,8 +7,8 @@ import (
)
// DropTree drops a tree from the database. If treeID is empty, all the trees are dropped.
func (s *Service) DropTree(_ context.Context, cid cid.ID, treeID string) error {
func (s *Service) DropTree(ctx context.Context, cid cid.ID, treeID string) error {
// The only current use-case is a container removal, where all trees should be removed.
// Thus there is no need to replicate the operation on other node.
return s.forest.TreeDrop(cid, treeID)
return s.forest.TreeDrop(ctx, cid, treeID)
}

View file

@ -1,6 +1,7 @@
package tree
import (
"context"
"errors"
"testing"
@ -32,7 +33,7 @@ func TestGetSubTree(t *testing.T) {
meta := []pilorama.KeyValue{
{Key: pilorama.AttributeFilename, Value: []byte(path[len(path)-1])}}
lm, err := p.TreeAddByPath(d, treeID, pilorama.AttributeFilename, path[:len(path)-1], meta)
lm, err := p.TreeAddByPath(context.Background(), d, treeID, pilorama.AttributeFilename, path[:len(path)-1], meta)
require.NoError(t, err)
require.Equal(t, 1, len(lm))
@ -41,7 +42,7 @@ func TestGetSubTree(t *testing.T) {
testGetSubTree := func(t *testing.T, rootID uint64, depth uint32, errIndex int) []uint64 {
acc := subTreeAcc{errIndex: errIndex}
err := getSubTree(&acc, d.CID, &GetSubTreeRequest_Body{
err := getSubTree(context.Background(), &acc, d.CID, &GetSubTreeRequest_Body{
TreeId: treeID,
RootId: rootID,
Depth: depth,
@ -68,7 +69,7 @@ func TestGetSubTree(t *testing.T) {
// GetSubTree must return valid meta.
for i := range acc.seen {
b := acc.seen[i].Body
meta, node, err := p.TreeGetMeta(d.CID, treeID, b.NodeId)
meta, node, err := p.TreeGetMeta(context.Background(), d.CID, treeID, b.NodeId)
require.NoError(t, err)
require.Equal(t, node, b.ParentId)
require.Equal(t, meta.Time, b.Timestamp)

View file

@ -5,8 +5,11 @@ import (
"context"
"errors"
"git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/pkg/tracing"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
netmapSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
@ -25,6 +28,12 @@ func (s *Service) forEachNode(ctx context.Context, cntNodes []netmapSDK.NodeInfo
for _, n := range cntNodes {
var stop bool
n.IterateNetworkEndpoints(func(endpoint string) bool {
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.IterateNetworkEndpoints",
trace.WithAttributes(
attribute.String("endpoint", endpoint),
))
defer span.End()
c, err := s.cache.get(ctx, endpoint)
if err != nil {
return false

View file

@ -8,10 +8,13 @@ import (
"fmt"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/pkg/tracing"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
netmapSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
@ -38,17 +41,25 @@ const (
defaultReplicatorSendTimeout = time.Second * 5
)
func (s *Service) localReplicationWorker() {
func (s *Service) localReplicationWorker(ctx context.Context) {
for {
select {
case <-s.closeCh:
return
case op := <-s.replicateLocalCh:
err := s.forest.TreeApply(op.cid, op.treeID, &op.Move, false)
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.HandleReplicationOperation",
trace.WithAttributes(
attribute.String("tree_id", op.treeID),
attribute.String("container_id", op.cid.EncodeToString()),
),
)
err := s.forest.TreeApply(ctx, op.cid, op.treeID, &op.Move, false)
if err != nil {
s.log.Error(logs.TreeFailedToApplyReplicatedOperation,
zap.String("err", err.Error()))
}
span.End()
}
}
}
@ -59,10 +70,24 @@ func (s *Service) replicationWorker(ctx context.Context) {
case <-s.closeCh:
return
case task := <-s.replicationTasks:
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.HandleReplicationTask",
trace.WithAttributes(
attribute.String("public_key", hex.EncodeToString(task.n.PublicKey())),
),
)
var lastErr error
var lastAddr string
task.n.IterateNetworkEndpoints(func(addr string) bool {
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.HandleReplicationTaskOnEndpoint",
trace.WithAttributes(
attribute.String("public_key", hex.EncodeToString(task.n.PublicKey())),
attribute.String("address", addr),
),
)
defer span.End()
lastAddr = addr
c, err := s.cache.get(ctx, addr)
@ -89,6 +114,7 @@ func (s *Service) replicationWorker(ctx context.Context) {
zap.String("key", hex.EncodeToString(task.n.PublicKey())))
}
}
span.End()
}
}
}
@ -96,7 +122,7 @@ func (s *Service) replicationWorker(ctx context.Context) {
func (s *Service) replicateLoop(ctx context.Context) {
for i := 0; i < s.replicatorWorkerCount; i++ {
go s.replicationWorker(ctx)
go s.localReplicationWorker()
go s.localReplicationWorker(ctx)
}
defer func() {
for len(s.replicationTasks) != 0 {

View file

@ -119,7 +119,7 @@ func (s *Service) Add(ctx context.Context, req *AddRequest) (*AddResponse, error
}
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
log, err := s.forest.TreeMove(d, b.GetTreeId(), &pilorama.Move{
log, err := s.forest.TreeMove(ctx, d, b.GetTreeId(), &pilorama.Move{
Parent: b.GetParentId(),
Child: pilorama.RootID,
Meta: pilorama.Meta{Items: protoToMeta(b.GetMeta())},
@ -174,7 +174,7 @@ func (s *Service) AddByPath(ctx context.Context, req *AddByPathRequest) (*AddByP
}
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
logs, err := s.forest.TreeAddByPath(d, b.GetTreeId(), attr, b.GetPath(), meta)
logs, err := s.forest.TreeAddByPath(ctx, d, b.GetTreeId(), attr, b.GetPath(), meta)
if err != nil {
return nil, err
}
@ -231,7 +231,7 @@ func (s *Service) Remove(ctx context.Context, req *RemoveRequest) (*RemoveRespon
}
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
log, err := s.forest.TreeMove(d, b.GetTreeId(), &pilorama.Move{
log, err := s.forest.TreeMove(ctx, d, b.GetTreeId(), &pilorama.Move{
Parent: pilorama.TrashID,
Child: b.GetNodeId(),
})
@ -280,7 +280,7 @@ func (s *Service) Move(ctx context.Context, req *MoveRequest) (*MoveResponse, er
}
d := pilorama.CIDDescriptor{CID: cid, Position: pos, Size: len(ns)}
log, err := s.forest.TreeMove(d, b.GetTreeId(), &pilorama.Move{
log, err := s.forest.TreeMove(ctx, d, b.GetTreeId(), &pilorama.Move{
Parent: b.GetParentId(),
Child: b.GetNodeId(),
Meta: pilorama.Meta{Items: protoToMeta(b.GetMeta())},
@ -328,14 +328,14 @@ func (s *Service) GetNodeByPath(ctx context.Context, req *GetNodeByPathRequest)
attr = pilorama.AttributeFilename
}
nodes, err := s.forest.TreeGetByPath(cid, b.GetTreeId(), attr, b.GetPath(), b.GetLatestOnly())
nodes, err := s.forest.TreeGetByPath(ctx, cid, b.GetTreeId(), attr, b.GetPath(), b.GetLatestOnly())
if err != nil {
return nil, err
}
info := make([]*GetNodeByPathResponse_Info, 0, len(nodes))
for _, node := range nodes {
m, parent, err := s.forest.TreeGetMeta(cid, b.GetTreeId(), node)
m, parent, err := s.forest.TreeGetMeta(ctx, cid, b.GetTreeId(), node)
if err != nil {
return nil, err
}
@ -406,10 +406,10 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS
return nil
}
return getSubTree(srv, cid, b, s.forest)
return getSubTree(srv.Context(), srv, cid, b, s.forest)
}
func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
// Traverse the tree in a DFS manner. Because we need to support arbitrary depth,
// recursive implementation is not suitable here, so we maintain explicit stack.
stack := [][]uint64{{b.GetRootId()}}
@ -425,7 +425,7 @@ func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRe
nodeID := stack[len(stack)-1][0]
stack[len(stack)-1] = stack[len(stack)-1][1:]
m, p, err := forest.TreeGetMeta(cid, b.GetTreeId(), nodeID)
m, p, err := forest.TreeGetMeta(ctx, cid, b.GetTreeId(), nodeID)
if err != nil {
return err
}
@ -442,7 +442,7 @@ func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRe
}
if b.GetDepth() == 0 || uint32(len(stack)) < b.GetDepth() {
children, err := forest.TreeGetChildren(cid, b.GetTreeId(), nodeID)
children, err := forest.TreeGetChildren(ctx, cid, b.GetTreeId(), nodeID)
if err != nil {
return err
}
@ -455,7 +455,7 @@ func getSubTree(srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRe
}
// Apply locally applies operation from the remote node to the tree.
func (s *Service) Apply(_ context.Context, req *ApplyRequest) (*ApplyResponse, error) {
func (s *Service) Apply(ctx context.Context, req *ApplyRequest) (*ApplyResponse, error) {
err := verifyMessage(req)
if err != nil {
return nil, err
@ -468,7 +468,7 @@ func (s *Service) Apply(_ context.Context, req *ApplyRequest) (*ApplyResponse, e
key := req.GetSignature().GetKey()
_, pos, _, err := s.getContainerInfo(cid, key)
_, pos, _, err := s.getContainerInfo(ctx, cid, key)
if err != nil {
return nil, err
}
@ -532,7 +532,7 @@ func (s *Service) GetOpLog(req *GetOpLogRequest, srv TreeService_GetOpLogServer)
h := b.GetHeight()
for {
lm, err := s.forest.TreeGetOpLog(cid, b.GetTreeId(), h)
lm, err := s.forest.TreeGetOpLog(srv.Context(), cid, b.GetTreeId(), h)
if err != nil || lm.Time == 0 {
return err
}
@ -587,7 +587,7 @@ func (s *Service) TreeList(ctx context.Context, req *TreeListRequest) (*TreeList
return resp, outErr
}
ids, err := s.forest.TreeList(cid)
ids, err := s.forest.TreeList(ctx, cid)
if err != nil {
return nil, err
}
@ -623,7 +623,7 @@ func metaToProto(arr []pilorama.KeyValue) []*KeyValue {
// getContainerInfo returns the list of container nodes, position in the container for the node
// with pub key and total amount of nodes in all replicas.
func (s *Service) getContainerInfo(cid cidSDK.ID, pub []byte) ([]netmapSDK.NodeInfo, int, int, error) {
func (s *Service) getContainerInfo(ctx context.Context, cid cidSDK.ID, pub []byte) ([]netmapSDK.NodeInfo, int, int, error) {
cntNodes, _, err := s.getContainerNodes(cid)
if err != nil {
return nil, 0, 0, err

View file

@ -85,7 +85,7 @@ func (s *Service) synchronizeAllTrees(ctx context.Context, cid cid.ID) error {
}
for _, tid := range treesToSync {
h, err := s.forest.TreeLastSyncHeight(cid, tid)
h, err := s.forest.TreeLastSyncHeight(ctx, cid, tid)
if err != nil && !errors.Is(err, pilorama.ErrTreeNotFound) {
s.log.Warn(logs.TreeCouldNotGetLastSynchronizedHeightForATree,
zap.Stringer("cid", cid),
@ -94,7 +94,7 @@ func (s *Service) synchronizeAllTrees(ctx context.Context, cid cid.ID) error {
}
newHeight := s.synchronizeTree(ctx, cid, h, tid, nodes)
if h < newHeight {
if err := s.forest.TreeUpdateLastSyncHeight(cid, tid, newHeight); err != nil {
if err := s.forest.TreeUpdateLastSyncHeight(ctx, cid, tid, newHeight); err != nil {
s.log.Warn(logs.TreeCouldNotUpdateLastSynchronizedHeightForATree,
zap.Stringer("cid", cid),
zap.String("tree", tid))
@ -232,7 +232,7 @@ func (s *Service) synchronizeSingle(ctx context.Context, cid cid.ID, treeID stri
if err := m.Meta.FromBytes(lm.Meta); err != nil {
return newHeight, err
}
if err := s.forest.TreeApply(cid, treeID, m, true); err != nil {
if err := s.forest.TreeApply(ctx, cid, treeID, m, true); err != nil {
return newHeight, err
}
if m.Time > newHeight {
@ -284,11 +284,13 @@ func (s *Service) syncLoop(ctx context.Context) {
case <-ctx.Done():
return
case <-s.syncChan:
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.sync")
s.log.Debug(logs.TreeSyncingTrees)
cnrs, err := s.cfg.cnrSource.List()
if err != nil {
s.log.Error(logs.TreeCouldNotFetchContainers, zap.Error(err))
span.End()
continue
}
@ -299,11 +301,15 @@ func (s *Service) syncLoop(ctx context.Context) {
s.removeContainers(ctx, newMap)
s.log.Debug(logs.TreeTreesHaveBeenSynchronized)
span.End()
}
}
}
func (s *Service) syncContainers(ctx context.Context, cnrs []cid.ID) {
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.syncContainers")
defer span.End()
// sync new containers
var wg sync.WaitGroup
for _, cnr := range cnrs {
@ -335,6 +341,9 @@ func (s *Service) syncContainers(ctx context.Context, cnrs []cid.ID) {
}
func (s *Service) removeContainers(ctx context.Context, newContainers map[cid.ID]struct{}) {
ctx, span := tracing.StartSpanFromContext(ctx, "TreeService.removeContainers")
defer span.End()
s.cnrMapMtx.Lock()
defer s.cnrMapMtx.Unlock()