From b17a1a8dd56b58f15b3e779b7815981924494adf Mon Sep 17 00:00:00 2001 From: Nikita Zinkevich Date: Fri, 13 Dec 2024 16:00:31 +0300 Subject: [PATCH] [#170] Archive download refactoring Split DownloadZipped handler on methods. Add handler DownloadTar for downloading tar.gz archives. Make methods more universal for using in both implementations Signed-off-by: Nikita Zinkevich --- cmd/http-gw/app.go | 1 + internal/handler/download.go | 226 +++++++++++++++++++++++------------ internal/handler/handler.go | 6 +- internal/handler/utils.go | 7 ++ internal/logs/logs.go | 4 +- 5 files changed, 166 insertions(+), 78 deletions(-) diff --git a/cmd/http-gw/app.go b/cmd/http-gw/app.go index 53c001c..290dfaa 100644 --- a/cmd/http-gw/app.go +++ b/cmd/http-gw/app.go @@ -647,6 +647,7 @@ func (a *app) configureRouter(handler *handler.Handler) { r.OPTIONS("/get_by_attribute/{cid}/{attr_key}/{attr_val:*}", a.addPreflight()) a.log.Info(logs.AddedPathGetByAttributeCidAttrKeyAttrVal) r.GET("/zip/{cid}/{prefix:*}", a.addMiddlewares(handler.DownloadZipped)) + r.GET("/tar/{cid}/{prefix:*}", a.addMiddlewares(handler.DownloadTar)) r.OPTIONS("/zip/{cid}/{prefix:*}", a.addPreflight()) a.log.Info(logs.AddedPathZipCidPrefix) diff --git a/internal/handler/download.go b/internal/handler/download.go index cd4e55a..2c70633 100644 --- a/internal/handler/download.go +++ b/internal/handler/download.go @@ -1,19 +1,20 @@ package handler import ( + "archive/tar" "archive/zip" "bufio" + "compress/gzip" "context" "fmt" "io" - "net/http" "net/url" "time" + "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/response" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils" - "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/bearer" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" @@ -36,13 +37,6 @@ func (h *Handler) DownloadByAddressOrBucketName(c *fasthttp.RequestCtx) { } } -func (h *Handler) newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request { - return &request{ - RequestCtx: ctx, - log: log, - } -} - // DownloadByAttribute handles attribute-based download requests. func (h *Handler) DownloadByAttribute(c *fasthttp.RequestCtx) { h.byAttribute(c, h.receiveFile) @@ -64,13 +58,58 @@ func (h *Handler) search(ctx context.Context, cnrID cid.ID, key, val string, op return h.frostfs.SearchObjects(ctx, prm) } -func (h *Handler) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, error) { +// DownloadZipped handles zip by prefix requests. +func (h *Handler) DownloadZipped(c *fasthttp.RequestCtx) { + scid, _ := c.UserValue("cid").(string) + + ctx := utils.GetContextFromRequest(c) + log := utils.GetReqLogOrDefault(ctx, h.log) + bktInfo, err := h.getBucketInfo(ctx, scid, log) + if err != nil { + logAndSendBucketError(c, log, err) + return + } + resSearch, err := h.searchObjectsByPrefix(c, log, bktInfo.CID) + if err != nil { + return + } + + c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip") + c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"") + + c.SetBodyStreamWriter(h.getZipResponseWriter(ctx, log, resSearch, bktInfo)) +} + +func (h *Handler) getZipResponseWriter(ctx context.Context, log *zap.Logger, resSearch ResObjectSearch, bktInfo *data.BucketInfo) func(w *bufio.Writer) { + return func(w *bufio.Writer) { + defer resSearch.Close() + + zipWriter := zip.NewWriter(w) + var bufZip []byte + + errIter := resSearch.Iterate(h.putObjectToArchive(ctx, log, bktInfo.CID, &bufZip, + func(obj *object.Object) (io.Writer, error) { + return h.createZipFile(zipWriter, obj) + }), + ) + if errIter != nil { + log.Error(logs.IteratingOverSelectedObjectsFailed, zap.Error(errIter)) + } else if bufZip == nil { + log.Error(logs.ObjectsNotFound) + } + if err := zipWriter.Close(); err != nil { + log.Error(logs.CloseZipWriter, zap.Error(err)) + } + } +} + +func (h *Handler) createZipFile(zw *zip.Writer, obj *object.Object) (io.Writer, error) { method := zip.Store if h.config.ZipCompression() { method = zip.Deflate } - filePath := getZipFilePath(obj) + filePath := getFilePath(obj) if len(filePath) == 0 || filePath[len(filePath)-1] == '/' { return nil, fmt.Errorf("invalid filepath '%s'", filePath) } @@ -82,98 +121,141 @@ func (h *Handler) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, }) } -// DownloadZipped handles zip by prefix requests. -func (h *Handler) DownloadZipped(c *fasthttp.RequestCtx) { +// DownloadTar forms tar.gz from objects by prefix. +func (h *Handler) DownloadTar(c *fasthttp.RequestCtx) { scid, _ := c.UserValue("cid").(string) - prefix, _ := c.UserValue("prefix").(string) ctx := utils.GetContextFromRequest(c) log := utils.GetReqLogOrDefault(ctx, h.log) - - prefix, err := url.QueryUnescape(prefix) - if err != nil { - log.Error(logs.FailedToUnescapeQuery, zap.String("cid", scid), zap.String("prefix", prefix), zap.Error(err)) - response.Error(c, "could not unescape prefix: "+err.Error(), fasthttp.StatusBadRequest) - return - } - - log = log.With(zap.String("cid", scid), zap.String("prefix", prefix)) - bktInfo, err := h.getBucketInfo(ctx, scid, log) if err != nil { logAndSendBucketError(c, log, err) return } - - resSearch, err := h.search(ctx, bktInfo.CID, object.AttributeFilePath, prefix, object.MatchCommonPrefix) + resSearch, err := h.searchObjectsByPrefix(c, log, bktInfo.CID) if err != nil { - log.Error(logs.CouldNotSearchForObjects, zap.Error(err)) - response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest) return } - c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip") - c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"") - c.Response.SetStatusCode(http.StatusOK) + c.Response.Header.Set(fasthttp.HeaderContentType, "application/x-gzip") + c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.tar.gz\"") - c.SetBodyStreamWriter(func(w *bufio.Writer) { + c.SetBodyStreamWriter(h.getTarResponseWriter(ctx, log, resSearch, bktInfo)) +} + +func (h *Handler) getTarResponseWriter(ctx context.Context, log *zap.Logger, resSearch ResObjectSearch, bktInfo *data.BucketInfo) func(w *bufio.Writer) { + return func(w *bufio.Writer) { defer resSearch.Close() - zipWriter := zip.NewWriter(w) + var gzipWriter *gzip.Writer + if h.config.ZipCompression() { + gzipWriter, _ = gzip.NewWriterLevel(w, gzip.DefaultCompression) + } else { + gzipWriter, _ = gzip.NewWriterLevel(w, gzip.NoCompression) + } + defer func() { + if err := gzipWriter.Close(); err != nil { + log.Error(logs.CloseGzipWriter, zap.Error(err)) + } + }() + + tarWriter := tar.NewWriter(gzipWriter) + defer func() { + if err := tarWriter.Close(); err != nil { + log.Error(logs.CloseTarWriter, zap.Error(err)) + } + }() var bufZip []byte - var addr oid.Address - empty := true - called := false - btoken := bearerToken(ctx) - addr.SetContainer(bktInfo.CID) - - errIter := resSearch.Iterate(func(id oid.ID) bool { - called = true - - if empty { - bufZip = make([]byte, 3<<20) // the same as for upload - } - empty = false - - addr.SetObject(id) - if err = h.zipObject(ctx, zipWriter, addr, btoken, bufZip); err != nil { - log.Error(logs.FailedToAddObjectToArchive, zap.String("oid", id.EncodeToString()), zap.Error(err)) - } - - return false - }) + errIter := resSearch.Iterate(h.putObjectToArchive(ctx, log, bktInfo.CID, &bufZip, + func(obj *object.Object) (io.Writer, error) { + return h.createTarFile(tarWriter, obj) + }), + ) if errIter != nil { log.Error(logs.IteratingOverSelectedObjectsFailed, zap.Error(errIter)) - } else if !called { + } else if bufZip == nil { log.Error(logs.ObjectsNotFound) } + } +} - if err = zipWriter.Close(); err != nil { - log.Error(logs.CloseZipWriter, zap.Error(err)) - } +func (h *Handler) createTarFile(tw *tar.Writer, obj *object.Object) (io.Writer, error) { + filePath := getFilePath(obj) + if len(filePath) == 0 || filePath[len(filePath)-1] == '/' { + return nil, fmt.Errorf("invalid filepath '%s'", filePath) + } + + return tw, tw.WriteHeader(&tar.Header{ + Name: filePath, + Mode: 0655, + Size: int64(obj.PayloadSize()), }) } -func (h *Handler) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid.Address, btoken *bearer.Token, bufZip []byte) error { - prm := PrmObjectGet{ - PrmAuth: PrmAuth{ - BearerToken: btoken, - }, - Address: addr, - } +func (h *Handler) putObjectToArchive(ctx context.Context, log *zap.Logger, cnrID cid.ID, bufZip *[]byte, createArchiveHeader func(obj *object.Object) (io.Writer, error)) func(id oid.ID) bool { + return func(id oid.ID) bool { + log := log.With(zap.String("oid", id.EncodeToString())) - resGet, err := h.frostfs.GetObject(ctx, prm) + if *bufZip == nil { + *bufZip = make([]byte, 3<<20) // the same as for upload + } + + prm := PrmObjectGet{ + PrmAuth: PrmAuth{ + BearerToken: bearerToken(ctx), + }, + Address: newAddress(cnrID, id), + } + + resGet, err := h.frostfs.GetObject(ctx, prm) + if err != nil { + log.Error(logs.FailedToGetObject, zap.Error(err)) + return false + } + + fileWriter, err := createArchiveHeader(&resGet.Header) + if err != nil { + log.Error(logs.FailedToAddObjectToArchive, zap.Error(err)) + return false + } + + if err = h.writeToArchive(resGet, fileWriter, *bufZip); err != nil { + log.Error(logs.FailedToAddObjectToArchive, zap.Error(err)) + return false + } + + return false + } +} + +func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) { + scid := cnrID.EncodeToString() + prefix, _ := c.UserValue("prefix").(string) + + ctx := utils.GetContextFromRequest(c) + + prefix, err := url.QueryUnescape(prefix) if err != nil { - return fmt.Errorf("get FrostFS object: %v", err) + log.Error(logs.FailedToUnescapeQuery, zap.String("cid", scid), zap.String("prefix", prefix), zap.Error(err)) + response.Error(c, "could not unescape prefix: "+err.Error(), fasthttp.StatusBadRequest) + return nil, err } - objWriter, err := h.addObjectToZip(zipWriter, &resGet.Header) + log = log.With(zap.String("cid", scid), zap.String("prefix", prefix)) + + resSearch, err := h.search(ctx, cnrID, object.AttributeFilePath, prefix, object.MatchCommonPrefix) if err != nil { - return fmt.Errorf("zip create header: %v", err) + log.Error(logs.CouldNotSearchForObjects, zap.Error(err)) + response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest) + return nil, err } + return resSearch, nil +} +func (h *Handler) writeToArchive(resGet *Object, objWriter io.Writer, bufZip []byte) error { + var err error if _, err = io.CopyBuffer(objWriter, resGet.Payload, bufZip); err != nil { return fmt.Errorf("copy object payload to zip file: %v", err) } @@ -182,14 +264,10 @@ func (h *Handler) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid return fmt.Errorf("object body close error: %w", err) } - if err = zipWriter.Flush(); err != nil { - return fmt.Errorf("flush zip writer: %v", err) - } - return nil } -func getZipFilePath(obj *object.Object) string { +func getFilePath(obj *object.Object) string { for _, attr := range obj.Attributes() { if attr.Key() == object.AttributeFilePath { return attr.Value() diff --git a/internal/handler/handler.go b/internal/handler/handler.go index 9ed7f99..fa9b973 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -215,7 +215,7 @@ func (h *Handler) byNativeAddress(c *fasthttp.RequestCtx, f func(context.Context addr := newAddress(bktInfo.CID, *objID) - f(ctx, *h.newRequest(c, log), addr) + f(ctx, newRequest(c, log), addr) } // byS3Path is a wrapper for function (e.g. request.headObject, request.receiveFile) that @@ -257,7 +257,7 @@ func (h *Handler) byS3Path(c *fasthttp.RequestCtx, f func(context.Context, reque } addr := newAddress(bktInfo.CID, foundOid.OID) - f(ctx, *h.newRequest(c, log), addr) + f(ctx, newRequest(c, log), addr) } // byAttribute is a wrapper similar to byNativeAddress. @@ -319,7 +319,7 @@ func (h *Handler) byAttribute(c *fasthttp.RequestCtx, f func(context.Context, re addrObj.SetContainer(bktInfo.CID) addrObj.SetObject(buf[0]) - f(ctx, *h.newRequest(c, log), addrObj) + f(ctx, newRequest(c, log), addrObj) } // resolveContainer decode container id, if it's not a valid container id diff --git a/internal/handler/utils.go b/internal/handler/utils.go index b537d64..617d756 100644 --- a/internal/handler/utils.go +++ b/internal/handler/utils.go @@ -22,6 +22,13 @@ type request struct { log *zap.Logger } +func newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) request { + return request{ + RequestCtx: ctx, + log: log, + } +} + func (r *request) handleFrostFSErr(err error, start time.Time) { logFields := []zap.Field{ zap.Stringer("elapsed", time.Since(start)), diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 4dfa21f..3dc804b 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -11,9 +11,12 @@ const ( ObjectNotFound = "object not found" // Error in ../../downloader/download.go ReadObjectListFailed = "read object list failed" // Error in ../../downloader/download.go FailedToAddObjectToArchive = "failed to add object to archive" // Error in ../../downloader/download.go + FailedToGetObject = "failed to get object" // Error in ../../downloader/download.go IteratingOverSelectedObjectsFailed = "iterating over selected objects failed" // Error in ../../downloader/download.go ObjectsNotFound = "objects not found" // Error in ../../downloader/download.go CloseZipWriter = "close zip writer" // Error in ../../downloader/download.go + CloseGzipWriter = "close gzip writer" // Error in ../../downloader/download.go + CloseTarWriter = "close tar writer" // Error in ../../downloader/download.go ServiceIsRunning = "service is running" // Info in ../../metrics/service.go ServiceCouldntStartOnConfiguredPort = "service couldn't start on configured port" // Warn in ../../metrics/service.go ServiceHasntStartedSinceItsDisabled = "service hasn't started since it's disabled" // Info in ../../metrics/service.go @@ -24,7 +27,6 @@ const ( IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go - CouldNotProcessHeaders = "could not process headers" // Error in ../../uploader/upload.go CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go CouldNotEncodeResponse = "could not encode response" // Error in ../../uploader/upload.go