Merge pull request #3691 from nspcc-dev/uploader-eq-search

upload-bin, NeoFSBlockFetcher: migrate to SEARCH with strict equality comparator
2024-11-25 23:42:23 +00:00 · 2024-11-25 13:43:12 +03:00 · 2024-11-25 13:43:12 +03:00 · 171e01be3c
commit 171e01be3c
parent 3d3f3f67e6 43609dd984
3 changed files with 56 additions and 42 deletions
--- a/cli/util/upload_bin.go
+++ b/cli/util/upload_bin.go
@ -32,8 +32,13 @@ import (
 )
 const (
-	// Number of objects to search in a batch for finding max block in container.
+	// Number of objects to upload in a batch. All batches of uploadBatchSize size
-	searchBatchSize = 10000
+	// except the most recent one are guaranteed to be completed and don't contain gaps.
 	uploadBatchSize = 10000
 	// Number of objects to search in a batch. If it is larger than uploadBatchSize,
 	// it may lead to many duplicate uploads. We need to search with EQ filter to
 	// avoid partially-completed SEARCH responses.
 	searchBatchSize = 1
 	// Size of object ID.
 	oidSize = sha256.Size
 )
@ -138,7 +143,7 @@ func uploadBin(ctx *cli.Context) error {
 	oldestMissingBlockIndex, errBlock := fetchLatestMissingBlockIndex(ctx.Context, p, containerID, acc.PrivateKey(), attr, int(currentBlockHeight), maxParallelSearches, maxRetries)
 	if errBlock != nil {
-		return cli.Exit(fmt.Errorf("failed to fetch the oldest missing block index from container: %w", err), 1)
+		return cli.Exit(fmt.Errorf("failed to fetch the oldest missing block index from container: %w", errBlock), 1)
 	}
 	fmt.Fprintln(ctx.App.Writer, "First block of latest incomplete batch uploaded to NeoFS container:", oldestMissingBlockIndex)
@ -210,8 +215,12 @@ func fetchLatestMissingBlockIndex(ctx context.Context, p *pool.Pool, containerID
 				prm := client.PrmObjectSearch{}
 				filters := object.NewSearchFilters()
 				if endIndex == startIndex+1 {
 					filters.AddFilter(attributeKey, fmt.Sprintf("%d", startIndex), object.MatchStringEqual)
 				} else {
 					filters.AddFilter(attributeKey, fmt.Sprintf("%d", startIndex), object.MatchNumGE)
 					filters.AddFilter(attributeKey, fmt.Sprintf("%d", endIndex), object.MatchNumLT)
 				}
 				prm.SetFilters(filters)
 				var (
 					objectIDs []oid.ID
@ -235,7 +244,7 @@ func fetchLatestMissingBlockIndex(ctx context.Context, p *pool.Pool, containerID
 				continue
 			}
 			if emptyBatchFound || (batch == numBatches && i == len(results)-1) {
-				return results[i].startIndex, nil
+				return results[i].startIndex / uploadBatchSize * uploadBatchSize, nil
 			}
 		}
 	}
@ -248,9 +257,9 @@ func uploadBlocks(ctx *cli.Context, p *pool.Pool, rpc *rpcclient.Client, signer
 		fmt.Fprintf(ctx.App.Writer, "No new blocks to upload. Need to upload starting from %d, current height %d\n", oldestMissingBlockIndex, currentBlockHeight)
 		return nil
 	}
-	for batchStart := oldestMissingBlockIndex; batchStart <= int(currentBlockHeight); batchStart += searchBatchSize {
+	for batchStart := oldestMissingBlockIndex; batchStart <= int(currentBlockHeight); batchStart += uploadBatchSize {
 		var (
-			batchEnd = min(batchStart+searchBatchSize, int(currentBlockHeight)+1)
+			batchEnd = min(batchStart+uploadBatchSize, int(currentBlockHeight)+1)
 			errCh    = make(chan error)
 			doneCh   = make(chan struct{})
 			wg       sync.WaitGroup
@ -335,41 +344,35 @@ func uploadBlocks(ctx *cli.Context, p *pool.Pool, rpc *rpcclient.Client, signer
 // uploadIndexFiles uploads missing index files to the container.
 func uploadIndexFiles(ctx *cli.Context, p *pool.Pool, containerID cid.ID, account *wallet.Account, signer user.Signer, oldestMissingBlockIndex uint, blockAttributeKey string, homomorphicHashingDisabled bool, maxParallelSearches, maxRetries int, debug bool) error {
 	attributeKey := ctx.String("index-attribute")
 	indexFileSize := ctx.Uint("index-file-size")
 	fmt.Fprintln(ctx.App.Writer, "Uploading index files...")
 	prm := client.PrmObjectSearch{}
 	filters := object.NewSearchFilters()
 	filters.AddFilter(attributeKey, fmt.Sprintf("%d", 0), object.MatchNumGE)
 	filters.AddFilter("IndexSize", fmt.Sprintf("%d", indexFileSize), object.MatchStringEqual)
 	prm.SetFilters(filters)
 	var objectIDs []oid.ID
 	errSearch := retry(func() error {
 		var errSearchIndex error
 		objectIDs, errSearchIndex = neofs.ObjectSearch(ctx.Context, p, account.PrivateKey(), containerID.String(), prm)
 		return errSearchIndex
 	}, maxRetries)
 	if errSearch != nil {
 		return fmt.Errorf("index files search failed: %w", errSearch)
 	}
 	existingIndexCount := uint(len(objectIDs))
 	expectedIndexCount := (oldestMissingBlockIndex - 1) / indexFileSize
 	if existingIndexCount >= expectedIndexCount {
 		fmt.Fprintf(ctx.App.Writer, "Index files are up to date. Existing: %d, expected: %d\n", existingIndexCount, expectedIndexCount)
 		return nil
 	}
 	fmt.Fprintf(ctx.App.Writer, "Current index files count: %d, expected: %d\n", existingIndexCount, expectedIndexCount)
 	var (
 		attributeKey  = ctx.String("index-attribute")
 		indexFileSize = ctx.Uint("index-file-size")
 		buffer   = make([]byte, indexFileSize*oidSize)
 		doneCh   = make(chan struct{})
 		errCh    = make(chan error)
 		emptyOid = make([]byte, oidSize)
 		expectedIndexCount = (oldestMissingBlockIndex - 1) / indexFileSize
 		existingIndexCount = uint(0)
 		filters            = object.NewSearchFilters()
 	)
 	fmt.Fprintln(ctx.App.Writer, "Uploading index files...")
 	go func() {
 		defer close(doneCh)
 		// Search for existing index files.
 		filters.AddFilter("IndexSize", fmt.Sprintf("%d", indexFileSize), object.MatchStringEqual)
 		indexIDs := searchObjects(ctx.Context, p, containerID, account, attributeKey, 0, expectedIndexCount, maxParallelSearches, maxRetries, errCh, filters)
 		for range indexIDs {
 			existingIndexCount++
 		}
 		if existingIndexCount >= expectedIndexCount {
 			fmt.Fprintf(ctx.App.Writer, "Index files are up to date. Existing: %d, expected: %d\n", existingIndexCount, expectedIndexCount)
 			return
 		}
 		fmt.Fprintf(ctx.App.Writer, "Current index files count: %d, expected: %d\n", existingIndexCount, expectedIndexCount)
 		// Main processing loop for each index file.
 		for i := existingIndexCount; i < expectedIndexCount; i++ {
@ -502,7 +505,7 @@ func uploadIndexFiles(ctx *cli.Context, p *pool.Pool, containerID cid.ID, accoun
 // searchObjects searches in parallel for objects with attribute GE startIndex and LT
 // endIndex. It returns a buffered channel of resulting object IDs and closes it once
 // OID search is finished. Errors are sent to errCh in a non-blocking way.
-func searchObjects(ctx context.Context, p *pool.Pool, containerID cid.ID, account *wallet.Account, blockAttributeKey string, startIndex, endIndex uint, maxParallelSearches, maxRetries int, errCh chan error) chan oid.ID {
+func searchObjects(ctx context.Context, p *pool.Pool, containerID cid.ID, account *wallet.Account, blockAttributeKey string, startIndex, endIndex uint, maxParallelSearches, maxRetries int, errCh chan error, additionalFilters ...object.SearchFilters) chan oid.ID {
 	var res = make(chan oid.ID, 2*searchBatchSize)
 	go func() {
 		var wg sync.WaitGroup
@ -526,8 +529,15 @@ func searchObjects(ctx context.Context, p *pool.Pool, containerID cid.ID, accoun
 					prm := client.PrmObjectSearch{}
 					filters := object.NewSearchFilters()
 					if len(additionalFilters) != 0 {
 						filters = additionalFilters[0]
 					}
 					if end == start+1 {
 						filters.AddFilter(blockAttributeKey, fmt.Sprintf("%d", start), object.MatchStringEqual)
 					} else {
 						filters.AddFilter(blockAttributeKey, fmt.Sprintf("%d", start), object.MatchNumGE)
 						filters.AddFilter(blockAttributeKey, fmt.Sprintf("%d", end), object.MatchNumLT)
 					}
 					prm.SetFilters(filters)
 					var objIDs []oid.ID
--- a/docs/neofs-blockstorage.md
+++ b/docs/neofs-blockstorage.md
@ -47,8 +47,7 @@ parameter.
    Depending on the mode, the service either:
   - Searches for index files by index file attribute and reads block OIDs from index
     file object-by-object.
-   - Searches batches of blocks directly by block attribute (the batch size is
+   - Searches blocks one by one directly by block attribute.
     configured via `OIDBatchSize` parameter).
   Once the OIDs are retrieved, they are immediately redirected to the 
   block downloading routines for further processing. The channel that 
--- a/pkg/services/blockfetcher/blockfetcher.go
+++ b/pkg/services/blockfetcher/blockfetcher.go
@ -342,7 +342,8 @@ func (bfs *Service) streamBlockOIDs(rc io.ReadCloser, skip int) error {
 // fetchOIDsBySearch fetches block OIDs from NeoFS by searching through the Block objects.
 func (bfs *Service) fetchOIDsBySearch() error {
 	startIndex := bfs.chain.BlockHeight()
-	batchSize := uint32(bfs.cfg.OIDBatchSize)
+	//We need to search with EQ filter to avoid partially-completed SEARCH responses.
 	batchSize := uint32(1)
 	for {
 		select {
@ -351,8 +352,12 @@ func (bfs *Service) fetchOIDsBySearch() error {
 		default:
 			prm := client.PrmObjectSearch{}
 			filters := object.NewSearchFilters()
 			if startIndex == startIndex+batchSize-1 {
 				filters.AddFilter(bfs.cfg.BlockAttribute, fmt.Sprintf("%d", startIndex), object.MatchStringEqual)
 			} else {
 				filters.AddFilter(bfs.cfg.BlockAttribute, fmt.Sprintf("%d", startIndex), object.MatchNumGE)
 				filters.AddFilter(bfs.cfg.BlockAttribute, fmt.Sprintf("%d", startIndex+batchSize-1), object.MatchNumLE)
 			}
 			prm.SetFilters(filters)
 			ctx, cancel := context.WithTimeout(bfs.ctx, bfs.cfg.Timeout)
 			blockOids, err := bfs.objectSearch(ctx, prm)