From 510727512ce81c307633e6f70126822ce1fbf8e8 Mon Sep 17 00:00:00 2001 From: Nikita Zinkevich Date: Fri, 13 Dec 2024 16:00:31 +0300 Subject: [PATCH] [#170] Support tar.gz downloading Split DownloadZipped handler on methods. Add handler DownloadTar for downloading tar.gz archives. Make methods more universal for using in both implementations Signed-off-by: Nikita Zinkevich --- cmd/http-gw/app.go | 2 + internal/handler/download.go | 219 ++++++++++++++++++++++++----------- internal/handler/handler.go | 2 +- internal/handler/utils.go | 7 ++ internal/logs/logs.go | 4 +- 5 files changed, 165 insertions(+), 69 deletions(-) diff --git a/cmd/http-gw/app.go b/cmd/http-gw/app.go index 23a752a..adbc8ae 100644 --- a/cmd/http-gw/app.go +++ b/cmd/http-gw/app.go @@ -657,6 +657,8 @@ func (a *app) configureRouter(handler *handler.Handler) { a.log.Info(logs.AddedPathGetByAttributeCidAttrKeyAttrVal) r.GET("/zip/{cid}/{prefix:*}", a.addMiddlewares(handler.DownloadZipped)) r.OPTIONS("/zip/{cid}/{prefix:*}", a.addPreflight()) + r.GET("/tar/{cid}/{prefix:*}", a.addMiddlewares(handler.DownloadTar)) + r.OPTIONS("/tar/{cid}/{prefix:*}", a.addPreflight()) a.log.Info(logs.AddedPathZipCidPrefix) a.webServer.Handler = r.Handler diff --git a/internal/handler/download.go b/internal/handler/download.go index de27fa3..aa2f5ec 100644 --- a/internal/handler/download.go +++ b/internal/handler/download.go @@ -1,21 +1,22 @@ package handler import ( + "archive/tar" "archive/zip" "bufio" + "compress/gzip" "context" "errors" "fmt" "io" - "net/http" "net/url" "time" + "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/layer" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/response" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils" - "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/bearer" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" @@ -91,13 +92,62 @@ func (h *Handler) search(ctx context.Context, cnrID cid.ID, key, val string, op return h.frostfs.SearchObjects(ctx, prm) } -func (h *Handler) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, error) { +// DownloadZipped handles zip by prefix requests. +func (h *Handler) DownloadZipped(c *fasthttp.RequestCtx) { + scid, _ := c.UserValue("cid").(string) + + ctx := utils.GetContextFromRequest(c) + log := utils.GetReqLogOrDefault(ctx, h.log) + bktInfo, err := h.getBucketInfo(ctx, scid, log) + if err != nil { + logAndSendBucketError(c, log, err) + return + } + resSearch, err := h.searchObjectsByPrefix(c, log, bktInfo.CID) + if err != nil { + return + } + + c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip") + c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"") + + c.SetBodyStreamWriter(h.getZipResponseWriter(ctx, log, resSearch, bktInfo)) +} + +func (h *Handler) getZipResponseWriter(ctx context.Context, log *zap.Logger, resSearch ResObjectSearch, bktInfo *data.BucketInfo) func(w *bufio.Writer) { + return func(w *bufio.Writer) { + defer resSearch.Close() + + bufZip := make([]byte, 3<<20) + zipWriter := zip.NewWriter(w) + var objectsWritten int + + errIter := resSearch.Iterate(h.putObjectToArchive(ctx, log, bktInfo.CID, bufZip, + func(obj *object.Object) (io.Writer, error) { + objectsWritten++ + return h.createZipFile(zipWriter, obj) + }), + ) + if errIter != nil { + log.Error(logs.IteratingOverSelectedObjectsFailed, zap.Error(errIter)) + return + } else if objectsWritten == 0 { + log.Warn(logs.ObjectsNotFound) + return + } + if err := zipWriter.Close(); err != nil { + log.Error(logs.CloseZipWriter, zap.Error(err)) + } + } +} + +func (h *Handler) createZipFile(zw *zip.Writer, obj *object.Object) (io.Writer, error) { method := zip.Store if h.config.ZipCompression() { method = zip.Deflate } - filePath := getZipFilePath(obj) + filePath := getFilePath(obj) if len(filePath) == 0 || filePath[len(filePath)-1] == '/' { return nil, fmt.Errorf("invalid filepath '%s'", filePath) } @@ -109,98 +159,137 @@ func (h *Handler) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, }) } -// DownloadZipped handles zip by prefix requests. -func (h *Handler) DownloadZipped(c *fasthttp.RequestCtx) { +// DownloadTar forms tar.gz from objects by prefix. +func (h *Handler) DownloadTar(c *fasthttp.RequestCtx) { scid, _ := c.UserValue("cid").(string) - prefix, _ := c.UserValue("prefix").(string) ctx := utils.GetContextFromRequest(c) log := utils.GetReqLogOrDefault(ctx, h.log) - - prefix, err := url.QueryUnescape(prefix) - if err != nil { - log.Error(logs.FailedToUnescapeQuery, zap.String("cid", scid), zap.String("prefix", prefix), zap.Error(err)) - response.Error(c, "could not unescape prefix: "+err.Error(), fasthttp.StatusBadRequest) - return - } - - log = log.With(zap.String("cid", scid), zap.String("prefix", prefix)) - bktInfo, err := h.getBucketInfo(ctx, scid, log) if err != nil { logAndSendBucketError(c, log, err) return } - - resSearch, err := h.search(ctx, bktInfo.CID, object.AttributeFilePath, prefix, object.MatchCommonPrefix) + resSearch, err := h.searchObjectsByPrefix(c, log, bktInfo.CID) if err != nil { - log.Error(logs.CouldNotSearchForObjects, zap.Error(err)) - response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest) return } - c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip") - c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"") - c.Response.SetStatusCode(http.StatusOK) + c.Response.Header.Set(fasthttp.HeaderContentType, "application/gzip") + c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.tar.gz\"") - c.SetBodyStreamWriter(func(w *bufio.Writer) { + c.SetBodyStreamWriter(h.getTarResponseWriter(ctx, log, resSearch, bktInfo)) +} + +func (h *Handler) getTarResponseWriter(ctx context.Context, log *zap.Logger, resSearch ResObjectSearch, bktInfo *data.BucketInfo) func(w *bufio.Writer) { + return func(w *bufio.Writer) { defer resSearch.Close() - zipWriter := zip.NewWriter(w) + compressionLevel := gzip.NoCompression + if h.config.ZipCompression() { + compressionLevel = gzip.DefaultCompression + } - var bufZip []byte - var addr oid.Address + gzipWriter, _ := gzip.NewWriterLevel(w, compressionLevel) + tarWriter := tar.NewWriter(gzipWriter) - empty := true - called := false - btoken := bearerToken(ctx) - addr.SetContainer(bktInfo.CID) - - errIter := resSearch.Iterate(func(id oid.ID) bool { - called = true - - if empty { - bufZip = make([]byte, 3<<20) // the same as for upload + defer func() { + if err := gzipWriter.Close(); err != nil { + log.Error(logs.CloseGzipWriter, zap.Error(err)) } - empty = false - - addr.SetObject(id) - if err = h.zipObject(ctx, zipWriter, addr, btoken, bufZip); err != nil { - log.Error(logs.FailedToAddObjectToArchive, zap.String("oid", id.EncodeToString()), zap.Error(err)) + if err := tarWriter.Close(); err != nil { + log.Error(logs.CloseTarWriter, zap.Error(err)) } + }() - return false - }) + var objectsWritten int + bufZip := make([]byte, 3<<20) // the same as for upload + + errIter := resSearch.Iterate(h.putObjectToArchive(ctx, log, bktInfo.CID, bufZip, + func(obj *object.Object) (io.Writer, error) { + objectsWritten++ + return h.createTarFile(tarWriter, obj) + }), + ) if errIter != nil { log.Error(logs.IteratingOverSelectedObjectsFailed, zap.Error(errIter)) - } else if !called { - log.Error(logs.ObjectsNotFound) + } else if objectsWritten == 0 { + log.Warn(logs.ObjectsNotFound) } + } +} - if err = zipWriter.Close(); err != nil { - log.Error(logs.CloseZipWriter, zap.Error(err)) - } +func (h *Handler) createTarFile(tw *tar.Writer, obj *object.Object) (io.Writer, error) { + filePath := getFilePath(obj) + if len(filePath) == 0 || filePath[len(filePath)-1] == '/' { + return nil, fmt.Errorf("invalid filepath '%s'", filePath) + } + + return tw, tw.WriteHeader(&tar.Header{ + Name: filePath, + Mode: 0655, + Size: int64(obj.PayloadSize()), }) } -func (h *Handler) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid.Address, btoken *bearer.Token, bufZip []byte) error { - prm := PrmObjectGet{ - PrmAuth: PrmAuth{ - BearerToken: btoken, - }, - Address: addr, - } +func (h *Handler) putObjectToArchive(ctx context.Context, log *zap.Logger, cnrID cid.ID, bufZip []byte, createArchiveHeader func(obj *object.Object) (io.Writer, error)) func(id oid.ID) bool { + return func(id oid.ID) bool { + log = log.With(zap.String("oid", id.EncodeToString())) - resGet, err := h.frostfs.GetObject(ctx, prm) + prm := PrmObjectGet{ + PrmAuth: PrmAuth{ + BearerToken: bearerToken(ctx), + }, + Address: newAddress(cnrID, id), + } + + resGet, err := h.frostfs.GetObject(ctx, prm) + if err != nil { + log.Error(logs.FailedToGetObject, zap.Error(err)) + return false + } + + fileWriter, err := createArchiveHeader(&resGet.Header) + if err != nil { + log.Error(logs.FailedToAddObjectToArchive, zap.Error(err)) + return false + } + + if err = writeToArchive(resGet, fileWriter, bufZip); err != nil { + log.Error(logs.FailedToAddObjectToArchive, zap.Error(err)) + return false + } + + return false + } +} + +func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) { + scid := cnrID.EncodeToString() + prefix, _ := c.UserValue("prefix").(string) + + ctx := utils.GetContextFromRequest(c) + + prefix, err := url.QueryUnescape(prefix) if err != nil { - return fmt.Errorf("get FrostFS object: %v", err) + log.Error(logs.FailedToUnescapeQuery, zap.String("cid", scid), zap.String("prefix", prefix), zap.Error(err)) + response.Error(c, "could not unescape prefix: "+err.Error(), fasthttp.StatusBadRequest) + return nil, err } - objWriter, err := h.addObjectToZip(zipWriter, &resGet.Header) + log = log.With(zap.String("cid", scid), zap.String("prefix", prefix)) + + resSearch, err := h.search(ctx, cnrID, object.AttributeFilePath, prefix, object.MatchCommonPrefix) if err != nil { - return fmt.Errorf("zip create header: %v", err) + log.Error(logs.CouldNotSearchForObjects, zap.Error(err)) + response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest) + return nil, err } + return resSearch, nil +} +func writeToArchive(resGet *Object, objWriter io.Writer, bufZip []byte) error { + var err error if _, err = io.CopyBuffer(objWriter, resGet.Payload, bufZip); err != nil { return fmt.Errorf("copy object payload to zip file: %v", err) } @@ -208,15 +297,11 @@ func (h *Handler) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid if err = resGet.Payload.Close(); err != nil { return fmt.Errorf("object body close error: %w", err) } - - if err = zipWriter.Flush(); err != nil { - return fmt.Errorf("flush zip writer: %v", err) - } - +// TODO: проверить вовзрат кода и zip.Flush и префикс return nil } -func getZipFilePath(obj *object.Object) string { +func getFilePath(obj *object.Object) string { for _, attr := range obj.Attributes() { if attr.Key() == object.AttributeFilePath { return attr.Value() diff --git a/internal/handler/handler.go b/internal/handler/handler.go index 3805c2d..653c02d 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -264,7 +264,7 @@ func (h *Handler) byAttribute(c *fasthttp.RequestCtx, handler func(context.Conte addr.SetContainer(bktInfo.CID) addr.SetObject(objID) - handler(ctx, h.newRequest(c, log), addr) + handler(ctx, newRequest(c, log), addr) } func (h *Handler) findObjectByAttribute(ctx context.Context, log *zap.Logger, cnrID cid.ID, attrKey, attrVal string) (oid.ID, error) { diff --git a/internal/handler/utils.go b/internal/handler/utils.go index d09ed23..fc41a1c 100644 --- a/internal/handler/utils.go +++ b/internal/handler/utils.go @@ -22,6 +22,13 @@ type request struct { log *zap.Logger } +func newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) request { + return request{ + RequestCtx: ctx, + log: log, + } +} + func (r *request) handleFrostFSErr(err error, start time.Time) { logFields := []zap.Field{ zap.Stringer("elapsed", time.Since(start)), diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 7a04064..48d951c 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -11,9 +11,12 @@ const ( ObjectNotFound = "object not found" // Error in ../../downloader/download.go ReadObjectListFailed = "read object list failed" // Error in ../../downloader/download.go FailedToAddObjectToArchive = "failed to add object to archive" // Error in ../../downloader/download.go + FailedToGetObject = "failed to get object" // Error in ../../downloader/download.go IteratingOverSelectedObjectsFailed = "iterating over selected objects failed" // Error in ../../downloader/download.go ObjectsNotFound = "objects not found" // Error in ../../downloader/download.go CloseZipWriter = "close zip writer" // Error in ../../downloader/download.go + CloseGzipWriter = "close gzip writer" // Error in ../../downloader/download.go + CloseTarWriter = "close tar writer" // Error in ../../downloader/download.go ServiceIsRunning = "service is running" // Info in ../../metrics/service.go ServiceCouldntStartOnConfiguredPort = "service couldn't start on configured port" // Warn in ../../metrics/service.go ServiceHasntStartedSinceItsDisabled = "service hasn't started since it's disabled" // Info in ../../metrics/service.go @@ -24,7 +27,6 @@ const ( IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go - CouldNotProcessHeaders = "could not process headers" // Error in ../../uploader/upload.go CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go CouldNotEncodeResponse = "could not encode response" // Error in ../../uploader/upload.go