[#170] Archive download refactoring

Split DownloadZipped handler on methods. Add handler DownloadTar for downloading tar.gz archives. Make methods more universal for using in both implementations

Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
Nikita Zinkevich 2024-12-06 12:53:24 +03:00
parent e81f01c2ab
commit ed3c7aca60
Signed by: nzinkevich
GPG key ID: 748EA1D0B2E6420A
6 changed files with 270 additions and 118 deletions

View file

@ -647,6 +647,7 @@ func (a *app) configureRouter(handler *handler.Handler) {
r.OPTIONS("/get_by_attribute/{cid}/{attr_key}/{attr_val:*}", a.addPreflight())
a.log.Info(logs.AddedPathGetByAttributeCidAttrKeyAttrVal)
r.GET("/zip/{cid}/{prefix:*}", a.addMiddlewares(handler.DownloadZipped))
r.GET("/tar/{cid}/{prefix:*}", a.addMiddlewares(handler.DownloadTar))
r.OPTIONS("/zip/{cid}/{prefix:*}", a.addPreflight())
a.log.Info(logs.AddedPathZipCidPrefix)

View file

@ -1,19 +1,20 @@
package handler
import (
"archive/tar"
"archive/zip"
"bufio"
"compress/gzip"
"context"
"fmt"
"io"
"net/http"
"net/url"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/bearer"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
@ -36,13 +37,6 @@ func (h *Handler) DownloadByAddressOrBucketName(c *fasthttp.RequestCtx) {
}
}
func (h *Handler) newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request {
return &request{
RequestCtx: ctx,
log: log,
}
}
// DownloadByAttribute handles attribute-based download requests.
func (h *Handler) DownloadByAttribute(c *fasthttp.RequestCtx) {
h.byAttribute(c, h.receiveFile)
@ -64,13 +58,58 @@ func (h *Handler) search(ctx context.Context, cnrID cid.ID, key, val string, op
return h.frostfs.SearchObjects(ctx, prm)
}
func (h *Handler) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, error) {
// DownloadZipped handles zip by prefix requests.
func (h *Handler) DownloadZipped(c *fasthttp.RequestCtx) {
scid, _ := c.UserValue("cid").(string)
ctx := utils.GetContextFromRequest(c)
log := utils.GetReqLogOrDefault(ctx, h.log)
bktInfo, err := h.getBucketInfo(ctx, scid, log)
if err != nil {
logAndSendBucketError(c, log, err)
return
}
resSearch, err := h.searchObjectsByPrefix(c, log, bktInfo.CID)
if err != nil {
return
}
c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip")
c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"")
c.SetBodyStreamWriter(h.getZipResponseWriter(ctx, log, resSearch, bktInfo))
}
func (h *Handler) getZipResponseWriter(ctx context.Context, log *zap.Logger, resSearch ResObjectSearch, bktInfo *data.BucketInfo) func(w *bufio.Writer) {
return func(w *bufio.Writer) {
defer resSearch.Close()
zipWriter := zip.NewWriter(w)
var bufZip []byte
errIter := resSearch.Iterate(h.putObjectToArchive(ctx, log, bktInfo.CID, &bufZip,
func(obj *object.Object) (io.Writer, error) {
return h.createZipFile(zipWriter, obj)
}),
)
if errIter != nil {
log.Error(logs.IteratingOverSelectedObjectsFailed, zap.Error(errIter))
} else if bufZip == nil {
log.Error(logs.ObjectsNotFound)
}
if err := zipWriter.Close(); err != nil {
log.Error(logs.CloseZipWriter, zap.Error(err))
}
}
}
func (h *Handler) createZipFile(zw *zip.Writer, obj *object.Object) (io.Writer, error) {
method := zip.Store
if h.config.ZipCompression() {
method = zip.Deflate
}
filePath := getZipFilePath(obj)
filePath := getFilePath(obj)
if len(filePath) == 0 || filePath[len(filePath)-1] == '/' {
return nil, fmt.Errorf("invalid filepath '%s'", filePath)
}
@ -82,98 +121,141 @@ func (h *Handler) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer,
})
}
// DownloadZipped handles zip by prefix requests.
func (h *Handler) DownloadZipped(c *fasthttp.RequestCtx) {
// DownloadTar forms tar.gz from objects by prefix.
func (h *Handler) DownloadTar(c *fasthttp.RequestCtx) {
scid, _ := c.UserValue("cid").(string)
prefix, _ := c.UserValue("prefix").(string)
ctx := utils.GetContextFromRequest(c)
log := utils.GetReqLogOrDefault(ctx, h.log)
prefix, err := url.QueryUnescape(prefix)
if err != nil {
log.Error(logs.FailedToUnescapeQuery, zap.String("cid", scid), zap.String("prefix", prefix), zap.Error(err))
response.Error(c, "could not unescape prefix: "+err.Error(), fasthttp.StatusBadRequest)
return
}
log = log.With(zap.String("cid", scid), zap.String("prefix", prefix))
bktInfo, err := h.getBucketInfo(ctx, scid, log)
if err != nil {
logAndSendBucketError(c, log, err)
return
}
resSearch, err := h.search(ctx, bktInfo.CID, object.AttributeFilePath, prefix, object.MatchCommonPrefix)
resSearch, err := h.searchObjectsByPrefix(c, log, bktInfo.CID)
if err != nil {
log.Error(logs.CouldNotSearchForObjects, zap.Error(err))
response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest)
return
}
c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip")
c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"")
c.Response.SetStatusCode(http.StatusOK)
c.Response.Header.Set(fasthttp.HeaderContentType, "application/x-gzip")
c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.tar.gz\"")
c.SetBodyStreamWriter(func(w *bufio.Writer) {
c.SetBodyStreamWriter(h.getTarResponseWriter(ctx, log, resSearch, bktInfo))
}
func (h *Handler) getTarResponseWriter(ctx context.Context, log *zap.Logger, resSearch ResObjectSearch, bktInfo *data.BucketInfo) func(w *bufio.Writer) {
return func(w *bufio.Writer) {
defer resSearch.Close()
zipWriter := zip.NewWriter(w)
var gzipWriter *gzip.Writer
if h.config.ZipCompression() {
gzipWriter, _ = gzip.NewWriterLevel(w, gzip.DefaultCompression)
} else {
gzipWriter, _ = gzip.NewWriterLevel(w, gzip.NoCompression)
}
defer func() {
if err := gzipWriter.Close(); err != nil {
log.Error(logs.CloseGzipWriter, zap.Error(err))
}
}()
tarWriter := tar.NewWriter(gzipWriter)
defer func() {
if err := tarWriter.Close(); err != nil {
log.Error(logs.CloseTarWriter, zap.Error(err))
}
}()
var bufZip []byte
var addr oid.Address
empty := true
called := false
btoken := bearerToken(ctx)
addr.SetContainer(bktInfo.CID)
errIter := resSearch.Iterate(func(id oid.ID) bool {
called = true
if empty {
bufZip = make([]byte, 3<<20) // the same as for upload
}
empty = false
addr.SetObject(id)
if err = h.zipObject(ctx, zipWriter, addr, btoken, bufZip); err != nil {
log.Error(logs.FailedToAddObjectToArchive, zap.String("oid", id.EncodeToString()), zap.Error(err))
}
return false
})
errIter := resSearch.Iterate(h.putObjectToArchive(ctx, log, bktInfo.CID, &bufZip,
func(obj *object.Object) (io.Writer, error) {
return h.createTarFile(tarWriter, obj)
}),
)
if errIter != nil {
log.Error(logs.IteratingOverSelectedObjectsFailed, zap.Error(errIter))
} else if !called {
} else if bufZip == nil {
log.Error(logs.ObjectsNotFound)
}
if err = zipWriter.Close(); err != nil {
log.Error(logs.CloseZipWriter, zap.Error(err))
}
}
func (h *Handler) createTarFile(tw *tar.Writer, obj *object.Object) (io.Writer, error) {
filePath := getFilePath(obj)
if len(filePath) == 0 || filePath[len(filePath)-1] == '/' {
return nil, fmt.Errorf("invalid filepath '%s'", filePath)
}
return tw, tw.WriteHeader(&tar.Header{
Name: filePath,
Mode: 0655,
Size: int64(obj.PayloadSize()),
})
}
func (h *Handler) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid.Address, btoken *bearer.Token, bufZip []byte) error {
func (h *Handler) putObjectToArchive(ctx context.Context, log *zap.Logger, cnrID cid.ID, bufZip *[]byte, createArchiveHeader func(obj *object.Object) (io.Writer, error)) func(id oid.ID) bool {
return func(id oid.ID) bool {
log := log.With(zap.String("oid", id.EncodeToString()))
if *bufZip == nil {
*bufZip = make([]byte, 3<<20) // the same as for upload
}
prm := PrmObjectGet{
PrmAuth: PrmAuth{
BearerToken: btoken,
BearerToken: bearerToken(ctx),
},
Address: addr,
Address: newAddress(cnrID, id),
}
resGet, err := h.frostfs.GetObject(ctx, prm)
if err != nil {
return fmt.Errorf("get FrostFS object: %v", err)
log.Error(logs.FailedToGetObject, zap.Error(err))
return false
}
objWriter, err := h.addObjectToZip(zipWriter, &resGet.Header)
fileWriter, err := createArchiveHeader(&resGet.Header)
if err != nil {
return fmt.Errorf("zip create header: %v", err)
log.Error(logs.FailedToAddObjectToArchive, zap.Error(err))
return false
}
if err = h.writeToArchive(resGet, fileWriter, *bufZip); err != nil {
log.Error(logs.FailedToAddObjectToArchive, zap.Error(err))
return false
}
return false
}
}
func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) {
scid := cnrID.EncodeToString()
prefix, _ := c.UserValue("prefix").(string)
ctx := utils.GetContextFromRequest(c)
prefix, err := url.QueryUnescape(prefix)
if err != nil {
log.Error(logs.FailedToUnescapeQuery, zap.String("cid", scid), zap.String("prefix", prefix), zap.Error(err))
response.Error(c, "could not unescape prefix: "+err.Error(), fasthttp.StatusBadRequest)
return nil, err
}
log = log.With(zap.String("cid", scid), zap.String("prefix", prefix))
resSearch, err := h.search(ctx, cnrID, object.AttributeFilePath, prefix, object.MatchCommonPrefix)
if err != nil {
log.Error(logs.CouldNotSearchForObjects, zap.Error(err))
response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest)
return nil, err
}
return resSearch, nil
}
func (h *Handler) writeToArchive(resGet *Object, objWriter io.Writer, bufZip []byte) error {
var err error
if _, err = io.CopyBuffer(objWriter, resGet.Payload, bufZip); err != nil {
return fmt.Errorf("copy object payload to zip file: %v", err)
}
@ -182,14 +264,10 @@ func (h *Handler) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid
return fmt.Errorf("object body close error: %w", err)
}
if err = zipWriter.Flush(); err != nil {
return fmt.Errorf("flush zip writer: %v", err)
}
return nil
}
func getZipFilePath(obj *object.Object) string {
func getFilePath(obj *object.Object) string {
for _, attr := range obj.Attributes() {
if attr.Key() == object.AttributeFilePath {
return attr.Value()

View file

@ -215,7 +215,7 @@ func (h *Handler) byNativeAddress(c *fasthttp.RequestCtx, f func(context.Context
addr := newAddress(bktInfo.CID, *objID)
f(ctx, *h.newRequest(c, log), addr)
f(ctx, newRequest(c, log), addr)
}
// byS3Path is a wrapper for function (e.g. request.headObject, request.receiveFile) that
@ -257,7 +257,7 @@ func (h *Handler) byS3Path(c *fasthttp.RequestCtx, f func(context.Context, reque
}
addr := newAddress(bktInfo.CID, foundOid.OID)
f(ctx, *h.newRequest(c, log), addr)
f(ctx, newRequest(c, log), addr)
}
// byAttribute is a wrapper similar to byNativeAddress.
@ -319,7 +319,7 @@ func (h *Handler) byAttribute(c *fasthttp.RequestCtx, f func(context.Context, re
addrObj.SetContainer(bktInfo.CID)
addrObj.SetObject(buf[0])
f(ctx, *h.newRequest(c, log), addrObj)
f(ctx, newRequest(c, log), addrObj)
}
// resolveContainer decode container id, if it's not a valid container id

View file

@ -1,13 +1,18 @@
package handler
import (
"archive/tar"
"compress/gzip"
"context"
"encoding/json"
"errors"
"io"
"net/http"
"path/filepath"
"strconv"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
@ -22,6 +27,7 @@ import (
const (
jsonHeader = "application/json; charset=UTF-8"
drainBufSize = 4096
explodeArchiveHeader = "Explode-Archive"
)
type putResponse struct {
@ -44,11 +50,7 @@ func (pr *putResponse) encode(w io.Writer) error {
// Upload handles multipart upload request.
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
var (
file MultipartFile
idObj oid.ID
addr oid.Address
)
var file MultipartFile
scid, _ := c.UserValue("cid").(string)
bodyStream := c.RequestBodyStream()
@ -72,7 +74,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
err := file.Close()
log.Debug(
logs.CloseTemporaryMultipartFormFile,
zap.Stringer("address", addr),
zap.Stringer("container", bktInfo.CID),
zap.String("filename", file.FileName()),
zap.Error(err),
)
@ -85,11 +87,51 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
return
}
if header := c.Request.Header.Peek(utils.UserAttributeHeaderPrefix + explodeArchiveHeader); header != nil {
h.explodeGzip(c, log, bktInfo, file)
} else {
h.uploadSingleObject(c, log, bktInfo, file)
}
// Multipart is multipart and thus can contain more than one part which
// we ignore at the moment. Also, when dealing with chunked encoding
// the last zero-length chunk might be left unread (because multipart
// reader only cares about its boundary and doesn't look further) and
// it will be (erroneously) interpreted as the start of the next
// pipelined header. Thus we need to drain the body buffer.
for {
_, err = bodyStream.Read(drainBuf)
if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) {
break
}
}
}
func (h *Handler) uploadSingleObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file MultipartFile) {
idObj, err := h.uploadObject(c, log, bktInfo, file.FileName(), file)
if err != nil {
log.Error(logs.FailedToUploadObject, zap.Error(err))
return
}
addr := newAddress(bktInfo.CID, idObj)
// Try to return the response, otherwise, if something went wrong, throw an error.
if err = newPutResponse(addr).encode(c); err != nil {
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
return
}
c.Response.Header.SetContentType(jsonHeader)
}
func (h *Handler) uploadObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, fileName string, file io.Reader) (oid.ID, error) {
ctx := utils.GetContextFromRequest(c)
filtered, err := filterHeaders(log, &c.Request.Header)
if err != nil {
log.Error(logs.CouldNotProcessHeaders, zap.Error(err))
response.Error(c, err.Error(), fasthttp.StatusBadRequest)
return
log.Error(logs.FailedToFilterHeaders, zap.Error(err))
response.Error(c, "could not filter headers", fasthttp.StatusBadRequest)
return oid.ID{}, err
}
now := time.Now()
@ -104,7 +146,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
return
return oid.ID{}, err
}
attributes := make([]object.Attribute, 0, len(filtered))
@ -117,10 +159,10 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
}
// sets FileName attribute if it wasn't set from header
if _, ok := filtered[object.AttributeFileName]; !ok {
filename := object.NewAttribute()
filename.SetKey(object.AttributeFileName)
filename.SetValue(file.FileName())
attributes = append(attributes, *filename)
fileNameAttr := object.NewAttribute()
fileNameAttr.SetKey(object.AttributeFileName)
fileNameAttr.SetValue(fileName)
attributes = append(attributes, *fileNameAttr)
}
// sets Timestamp attribute if it wasn't set from header and enabled by settings
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
@ -146,36 +188,51 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
BufferMaxSize: h.config.BufferMaxSizeForPut(),
}
var idObj oid.ID
if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil {
h.handlePutFrostFSErr(c, err, log)
return
return oid.ID{}, err
}
return idObj, nil
}
addr.SetObject(idObj)
addr.SetContainer(bktInfo.CID)
// Try to return the response, otherwise, if something went wrong, throw an error.
if err = newPutResponse(addr).encode(c); err != nil {
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
// explodeGzip read files from tar.gz archive and creates objects for each of them.
// Sets FilePath attribute with name from tar.Header
func (h *Handler) explodeGzip(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file io.Reader) {
gzipReader, err := gzip.NewReader(file)
if err != nil {
log.Error(logs.FailedToCreateReader, zap.Error(err))
response.Error(c, "could not create gzip reader: "+err.Error(), fasthttp.StatusBadRequest)
return
}
// Multipart is multipart and thus can contain more than one part which
// we ignore at the moment. Also, when dealing with chunked encoding
// the last zero-length chunk might be left unread (because multipart
// reader only cares about its boundary and doesn't look further) and
// it will be (erroneously) interpreted as the start of the next
// pipelined header. Thus we need to drain the body buffer.
defer func() {
if err := gzipReader.Close(); err != nil {
log.Error(logs.FailedToCloseReader, zap.Error(err))
}
}()
tarReader := tar.NewReader(gzipReader)
var obj *tar.Header
for {
_, err = bodyStream.Read(drainBuf)
if err == io.EOF || err == io.ErrUnexpectedEOF {
obj, err = tarReader.Next()
if errors.Is(err, io.EOF) {
break
} else if err != nil {
log.Error(logs.FailedToReadFileFromTar, zap.Error(err))
continue
}
if isDir(obj.Name) {
continue
}
c.Request.Header.Set(utils.UserAttributeHeaderPrefix+object.AttributeFilePath, obj.Name)
idObj, err := h.uploadObject(c, log, bktInfo, filepath.Base(obj.Name), tarReader)
if err != nil {
log.Error(logs.FailedToUploadObject, zap.Error(err))
response.Error(c, "could not upload object: "+err.Error(), fasthttp.StatusBadRequest)
}
log.Debug(logs.ObjectUploaded, zap.String("object ID", idObj.EncodeToString()))
}
// Report status code and content type.
c.Response.SetStatusCode(fasthttp.StatusOK)
c.Response.Header.SetContentType(jsonHeader)
}
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) {

View file

@ -22,6 +22,13 @@ type request struct {
log *zap.Logger
}
func newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) request {
return request{
RequestCtx: ctx,
log: log,
}
}
func (r *request) handleFrostFSErr(err error, start time.Time) {
logFields := []zap.Field{
zap.Stringer("elapsed", time.Since(start)),

View file

@ -11,9 +11,13 @@ const (
ObjectNotFound = "object not found" // Error in ../../downloader/download.go
ReadObjectListFailed = "read object list failed" // Error in ../../downloader/download.go
FailedToAddObjectToArchive = "failed to add object to archive" // Error in ../../downloader/download.go
FailedToFlushWriter = "failed to flush writer" // Error in ../../downloader/download.go
FailedToGetObject = "failed to get object" // Error in ../../downloader/download.go
IteratingOverSelectedObjectsFailed = "iterating over selected objects failed" // Error in ../../downloader/download.go
ObjectsNotFound = "objects not found" // Error in ../../downloader/download.go
CloseZipWriter = "close zip writer" // Error in ../../downloader/download.go
CloseGzipWriter = "close gzip writer" // Error in ../../downloader/download.go
CloseTarWriter = "close tar writer" // Error in ../../downloader/download.go
ServiceIsRunning = "service is running" // Info in ../../metrics/service.go
ServiceCouldntStartOnConfiguredPort = "service couldn't start on configured port" // Warn in ../../metrics/service.go
ServiceHasntStartedSinceItsDisabled = "service hasn't started since it's disabled" // Info in ../../metrics/service.go
@ -24,9 +28,14 @@ const (
IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go
CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go
CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go
CouldNotProcessHeaders = "could not process headers" // Error in ../../uploader/upload.go
CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go
CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go
FailedToCloseReader = "failed to close reader" // Error in ../../uploader/upload.go
FailedToCreateReader = "failed to create reader" // Error in ../../uploader/upload.go
FailedToReadFileFromTar = "failed to read file from tar" // Error in ../../uploader/upload.go
FailedToFilterHeaders = "failed to filter headers" // Error in ../../uploader/upload.go
FailedToUploadObject = "failed to upload object" // Error in ../../uploader/upload.go
ObjectUploaded = "object uploaded" // Debug in ../../uploader/upload.go
CouldNotEncodeResponse = "could not encode response" // Error in ../../uploader/upload.go
CouldNotStoreFileInFrostfs = "could not store file in frostfs" // Error in ../../uploader/upload.go
AddAttributeToResultObject = "add attribute to result object" // Debug in ../../uploader/filter.go