From 69ad9775772d882a878e7391387b8b1c57eb42ba Mon Sep 17 00:00:00 2001 From: Nikita Zinkevich Date: Fri, 6 Dec 2024 10:48:51 +0300 Subject: [PATCH] [#170] Support tar.gz exploding During upload if "X-Attribute-Explode-Archive" is set, gate tries to read tar.gz archive and creates an object for each file. Each object acquires a FilePath attribute which is calculated relative to the archive root Signed-off-by: Nikita Zinkevich --- internal/handler/download.go | 2 +- internal/handler/filter.go | 2 +- internal/handler/handler.go | 6 +- internal/handler/upload.go | 137 ++++++++++++++++++++++---------- internal/handler/upload_test.go | 1 + internal/logs/logs.go | 6 ++ 6 files changed, 109 insertions(+), 45 deletions(-) create mode 100644 internal/handler/upload_test.go diff --git a/internal/handler/download.go b/internal/handler/download.go index cd4e55a..52251ae 100644 --- a/internal/handler/download.go +++ b/internal/handler/download.go @@ -36,7 +36,7 @@ func (h *Handler) DownloadByAddressOrBucketName(c *fasthttp.RequestCtx) { } } -func (h *Handler) newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request { +func newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request { return &request{ RequestCtx: ctx, log: log, diff --git a/internal/handler/filter.go b/internal/handler/filter.go index 745718a..9d50d81 100644 --- a/internal/handler/filter.go +++ b/internal/handler/filter.go @@ -39,7 +39,7 @@ func filterHeaders(l *zap.Logger, header *fasthttp.RequestHeader) (map[string]st // check if key gets duplicated // return error containing full key name (with prefix) if _, ok := result[string(clearKey)]; ok { - err = fmt.Errorf("key duplication error: %s", string(key)) + err = fmt.Errorf("header key duplication error: %s", string(key)) return } diff --git a/internal/handler/handler.go b/internal/handler/handler.go index 9ed7f99..ebd0db0 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -215,7 +215,7 @@ func (h *Handler) byNativeAddress(c *fasthttp.RequestCtx, f func(context.Context addr := newAddress(bktInfo.CID, *objID) - f(ctx, *h.newRequest(c, log), addr) + f(ctx, *newRequest(c, log), addr) } // byS3Path is a wrapper for function (e.g. request.headObject, request.receiveFile) that @@ -257,7 +257,7 @@ func (h *Handler) byS3Path(c *fasthttp.RequestCtx, f func(context.Context, reque } addr := newAddress(bktInfo.CID, foundOid.OID) - f(ctx, *h.newRequest(c, log), addr) + f(ctx, *newRequest(c, log), addr) } // byAttribute is a wrapper similar to byNativeAddress. @@ -319,7 +319,7 @@ func (h *Handler) byAttribute(c *fasthttp.RequestCtx, f func(context.Context, re addrObj.SetContainer(bktInfo.CID) addrObj.SetObject(buf[0]) - f(ctx, *h.newRequest(c, log), addrObj) + f(ctx, *newRequest(c, log), addrObj) } // resolveContainer decode container id, if it's not a valid container id diff --git a/internal/handler/upload.go b/internal/handler/upload.go index 867025d..49c327b 100644 --- a/internal/handler/upload.go +++ b/internal/handler/upload.go @@ -1,13 +1,18 @@ package handler import ( + "archive/tar" + "compress/gzip" "context" "encoding/json" + "errors" "io" "net/http" + "path/filepath" "strconv" "time" + "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/response" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens" @@ -20,8 +25,9 @@ import ( ) const ( - jsonHeader = "application/json; charset=UTF-8" - drainBufSize = 4096 + jsonHeader = "application/json; charset=UTF-8" + drainBufSize = 4096 + explodeArchiveHeader = "Explode-Archive" ) type putResponse struct { @@ -44,11 +50,7 @@ func (pr *putResponse) encode(w io.Writer) error { // Upload handles multipart upload request. func (h *Handler) Upload(c *fasthttp.RequestCtx) { - var ( - file MultipartFile - idObj oid.ID - addr oid.Address - ) + var file MultipartFile scid, _ := c.UserValue("cid").(string) bodyStream := c.RequestBodyStream() @@ -72,7 +74,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { err := file.Close() log.Debug( logs.CloseTemporaryMultipartFormFile, - zap.Stringer("address", addr), + zap.Stringer("container", bktInfo.CID), zap.String("filename", file.FileName()), zap.Error(err), ) @@ -85,11 +87,51 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { return } + if header := c.Request.Header.Peek(utils.UserAttributeHeaderPrefix + explodeArchiveHeader); header != nil { + h.explodeGzip(c, log, bktInfo, file) + } else { + h.uploadSingleObject(c, log, bktInfo, file) + } + + // Multipart is multipart and thus can contain more than one part which + // we ignore at the moment. Also, when dealing with chunked encoding + // the last zero-length chunk might be left unread (because multipart + // reader only cares about its boundary and doesn't look further) and + // it will be (erroneously) interpreted as the start of the next + // pipelined header. Thus we need to drain the body buffer. + for { + _, err = bodyStream.Read(drainBuf) + if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) { + break + } + } +} + +func (h *Handler) uploadSingleObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file MultipartFile) { + idObj, err := h.uploadObject(c, log, bktInfo, file.FileName(), file) + if err != nil { + log.Error(logs.FailedToUploadObject, zap.Error(err)) + return + } + addr := newAddress(bktInfo.CID, idObj) + + // Try to return the response, otherwise, if something went wrong, throw an error. + if err = newPutResponse(addr).encode(c); err != nil { + log.Error(logs.CouldNotEncodeResponse, zap.Error(err)) + response.Error(c, "could not encode response", fasthttp.StatusBadRequest) + return + } + + c.Response.Header.SetContentType(jsonHeader) +} + +func (h *Handler) uploadObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, fileName string, file io.Reader) (oid.ID, error) { + ctx := utils.GetContextFromRequest(c) filtered, err := filterHeaders(log, &c.Request.Header) if err != nil { - log.Error(logs.CouldNotProcessHeaders, zap.Error(err)) - response.Error(c, err.Error(), fasthttp.StatusBadRequest) - return + log.Error(logs.FailedToFilterHeaders, zap.Error(err)) + response.Error(c, "could not filter headers", fasthttp.StatusBadRequest) + return oid.ID{}, err } now := time.Now() @@ -104,7 +146,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil { log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err)) response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest) - return + return oid.ID{}, err } attributes := make([]object.Attribute, 0, len(filtered)) @@ -117,10 +159,10 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { } // sets FileName attribute if it wasn't set from header if _, ok := filtered[object.AttributeFileName]; !ok { - filename := object.NewAttribute() - filename.SetKey(object.AttributeFileName) - filename.SetValue(file.FileName()) - attributes = append(attributes, *filename) + fileNameAttr := object.NewAttribute() + fileNameAttr.SetKey(object.AttributeFileName) + fileNameAttr.SetValue(fileName) + attributes = append(attributes, *fileNameAttr) } // sets Timestamp attribute if it wasn't set from header and enabled by settings if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() { @@ -146,36 +188,51 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { BufferMaxSize: h.config.BufferMaxSizeForPut(), } + var idObj oid.ID if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil { h.handlePutFrostFSErr(c, err, log) + return oid.ID{}, err + } + return idObj, nil +} + +// explodeGzip read files from tar.gz archive and creates objects for each of them. +// Sets FilePath attribute with name from tar.Header. +func (h *Handler) explodeGzip(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file io.Reader) { + gzipReader, err := gzip.NewReader(file) + if err != nil { + log.Error(logs.FailedToCreateReader, zap.Error(err)) + response.Error(c, "could not create gzip reader: "+err.Error(), fasthttp.StatusBadRequest) return } - - addr.SetObject(idObj) - addr.SetContainer(bktInfo.CID) - - // Try to return the response, otherwise, if something went wrong, throw an error. - if err = newPutResponse(addr).encode(c); err != nil { - log.Error(logs.CouldNotEncodeResponse, zap.Error(err)) - response.Error(c, "could not encode response", fasthttp.StatusBadRequest) - - return - } - // Multipart is multipart and thus can contain more than one part which - // we ignore at the moment. Also, when dealing with chunked encoding - // the last zero-length chunk might be left unread (because multipart - // reader only cares about its boundary and doesn't look further) and - // it will be (erroneously) interpreted as the start of the next - // pipelined header. Thus we need to drain the body buffer. - for { - _, err = bodyStream.Read(drainBuf) - if err == io.EOF || err == io.ErrUnexpectedEOF { - break + defer func() { + if err := gzipReader.Close(); err != nil { + log.Error(logs.FailedToCloseReader, zap.Error(err)) } + }() + + tarReader := tar.NewReader(gzipReader) + var obj *tar.Header + + for { + obj, err = tarReader.Next() + if errors.Is(err, io.EOF) { + break + } else if err != nil { + log.Error(logs.FailedToReadFileFromTar, zap.Error(err)) + continue + } + if isDir(obj.Name) { + continue + } + c.Request.Header.Set(utils.UserAttributeHeaderPrefix+object.AttributeFilePath, obj.Name) + idObj, err := h.uploadObject(c, log, bktInfo, filepath.Base(obj.Name), tarReader) + if err != nil { + log.Error(logs.FailedToUploadObject, zap.Error(err)) + response.Error(c, "could not upload object: "+err.Error(), fasthttp.StatusBadRequest) + } + log.Debug(logs.ObjectUploaded, zap.String("object ID", idObj.EncodeToString())) } - // Report status code and content type. - c.Response.SetStatusCode(fasthttp.StatusOK) - c.Response.Header.SetContentType(jsonHeader) } func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) { diff --git a/internal/handler/upload_test.go b/internal/handler/upload_test.go new file mode 100644 index 0000000..abeebd1 --- /dev/null +++ b/internal/handler/upload_test.go @@ -0,0 +1 @@ +package handler diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 4dfa21f..3018bba 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -27,6 +27,12 @@ const ( CouldNotProcessHeaders = "could not process headers" // Error in ../../uploader/upload.go CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go + FailedToCloseReader = "failed to close reader" // Error in ../../uploader/upload.go + FailedToCreateReader = "failed to create reader" // Error in ../../uploader/upload.go + FailedToReadFileFromTar = "failed to read file from tar" // Error in ../../uploader/upload.go + FailedToFilterHeaders = "failed to filter headers" // Error in ../../uploader/upload.go + FailedToUploadObject = "failed to upload object" // Error in ../../uploader/upload.go + ObjectUploaded = "object uploaded" // Debug in ../../uploader/upload.go CouldNotEncodeResponse = "could not encode response" // Error in ../../uploader/upload.go CouldNotStoreFileInFrostfs = "could not store file in frostfs" // Error in ../../uploader/upload.go AddAttributeToResultObject = "add attribute to result object" // Debug in ../../uploader/filter.go