From be3d88ec19f37427314c764523936871d70409d5 Mon Sep 17 00:00:00 2001 From: Nikita Zinkevich Date: Fri, 6 Dec 2024 15:01:16 +0300 Subject: [PATCH] [#170] Support .tar/.tgz unpacking during request During upload if "X-Explode-Archive" is set, gate tries to read archive and creates an object for each file. Each object acquires a FilePath attribute which is calculated relative to the archive root Signed-off-by: Nikita Zinkevich --- internal/handler/filter.go | 2 +- internal/handler/multipart.go | 4 +- internal/handler/upload.go | 184 +++++++++++++++++++++++--------- internal/handler/upload_test.go | 1 + internal/logs/logs.go | 8 +- 5 files changed, 145 insertions(+), 54 deletions(-) create mode 100644 internal/handler/upload_test.go diff --git a/internal/handler/filter.go b/internal/handler/filter.go index 745718a..9d50d81 100644 --- a/internal/handler/filter.go +++ b/internal/handler/filter.go @@ -39,7 +39,7 @@ func filterHeaders(l *zap.Logger, header *fasthttp.RequestHeader) (map[string]st // check if key gets duplicated // return error containing full key name (with prefix) if _, ok := result[string(clearKey)]; ok { - err = fmt.Errorf("key duplication error: %s", string(key)) + err = fmt.Errorf("header key duplication error: %s", string(key)) return } diff --git a/internal/handler/multipart.go b/internal/handler/multipart.go index 213286c..ebf5edd 100644 --- a/internal/handler/multipart.go +++ b/internal/handler/multipart.go @@ -42,7 +42,9 @@ func fetchMultipartFile(l *zap.Logger, r io.Reader, boundary string) (MultipartF // ignore multipart/form-data values if filename == "" { l.Debug(logs.IgnorePartEmptyFilename, zap.String("form", name)) - + if err = part.Close(); err != nil { + l.Warn(logs.FailedToCloseReader, zap.Error(err)) + } continue } diff --git a/internal/handler/upload.go b/internal/handler/upload.go index 867025d..e64bfd5 100644 --- a/internal/handler/upload.go +++ b/internal/handler/upload.go @@ -1,13 +1,19 @@ package handler import ( + "archive/tar" + "bytes" + "compress/gzip" "context" "encoding/json" + "errors" "io" "net/http" + "path/filepath" "strconv" "time" + "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/response" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens" @@ -20,10 +26,37 @@ import ( ) const ( - jsonHeader = "application/json; charset=UTF-8" - drainBufSize = 4096 + jsonHeader = "application/json; charset=UTF-8" + drainBufSize = 4096 + explodeArchiveHeader = "X-Explode-Archive" ) +type accumulatedReader struct { + reader io.Reader + buffer *bytes.Buffer +} + +func newAccumulatedReader(rc io.Reader) *accumulatedReader { + return &accumulatedReader{ + reader: rc, + buffer: &bytes.Buffer{}, + } +} + +// Read reads data from the underlying io.Reader and accumulates it into the buffer. +func (r *accumulatedReader) Read(p []byte) (int, error) { + n, err := r.reader.Read(p) + if n > 0 { + r.buffer.Write(p[:n]) + } + return n, err +} + +func (r *accumulatedReader) Restore() { + r.reader = io.MultiReader(r.buffer, r.reader) + r.buffer = &bytes.Buffer{} +} + type putResponse struct { ObjectID string `json:"object_id"` ContainerID string `json:"container_id"` @@ -44,11 +77,7 @@ func (pr *putResponse) encode(w io.Writer) error { // Upload handles multipart upload request. func (h *Handler) Upload(c *fasthttp.RequestCtx) { - var ( - file MultipartFile - idObj oid.ID - addr oid.Address - ) + var file MultipartFile scid, _ := c.UserValue("cid").(string) bodyStream := c.RequestBodyStream() @@ -64,32 +93,62 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { return } - defer func() { - // If the temporary reader can be closed - let's close it. - if file == nil { - return - } - err := file.Close() - log.Debug( - logs.CloseTemporaryMultipartFormFile, - zap.Stringer("address", addr), - zap.String("filename", file.FileName()), - zap.Error(err), - ) - }() - boundary := string(c.Request.Header.MultipartFormBoundary()) if file, err = fetchMultipartFile(log, bodyStream, boundary); err != nil { log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err)) response.Error(c, "could not receive multipart/form: "+err.Error(), fasthttp.StatusBadRequest) return } + defer func() { + if err := file.Close(); err != nil { + log.Warn(logs.FailedToCloseTemporaryMultipartFormFile, zap.Error(err)) + } + }() + if c.Request.Header.Peek(explodeArchiveHeader) != nil { + h.explodeArchive(c, log, bktInfo, file) + } else { + h.uploadSingleObject(c, log, bktInfo, file) + } + + // Multipart is multipart and thus can contain more than one part which + // we ignore at the moment. Also, when dealing with chunked encoding + // the last zero-length chunk might be left unread (because multipart + // reader only cares about its boundary and doesn't look further) and + // it will be (erroneously) interpreted as the start of the next + // pipelined header. Thus we need to drain the body buffer. + for { + _, err = bodyStream.Read(drainBuf) + if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) { + break + } + } +} + +func (h *Handler) uploadSingleObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file MultipartFile) { + idObj, err := h.uploadObject(c, log, bktInfo, file.FileName(), file) + if err != nil { + log.Error(logs.FailedToUploadObject, zap.Error(err)) + return + } + addr := newAddress(bktInfo.CID, idObj) + + c.Response.Header.SetContentType(jsonHeader) + // Try to return the response, otherwise, if something went wrong, throw an error. + if err = newPutResponse(addr).encode(c); err != nil { + log.Error(logs.CouldNotEncodeResponse, zap.Error(err)) + response.Error(c, "could not encode response", fasthttp.StatusBadRequest) + return + } +} + +func (h *Handler) uploadObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, fileName string, file io.Reader) (oid.ID, error) { + ctx := utils.GetContextFromRequest(c) filtered, err := filterHeaders(log, &c.Request.Header) if err != nil { - log.Error(logs.CouldNotProcessHeaders, zap.Error(err)) - response.Error(c, err.Error(), fasthttp.StatusBadRequest) - return + log.Error(logs.FailedToFilterHeaders, zap.Error(err)) + response.Error(c, "could not filter headers", fasthttp.StatusBadRequest) + return oid.ID{}, err } now := time.Now() @@ -104,7 +163,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil { log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err)) response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest) - return + return oid.ID{}, err } attributes := make([]object.Attribute, 0, len(filtered)) @@ -117,10 +176,10 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { } // sets FileName attribute if it wasn't set from header if _, ok := filtered[object.AttributeFileName]; !ok { - filename := object.NewAttribute() - filename.SetKey(object.AttributeFileName) - filename.SetValue(file.FileName()) - attributes = append(attributes, *filename) + fileNameAttr := object.NewAttribute() + fileNameAttr.SetKey(object.AttributeFileName) + fileNameAttr.SetValue(fileName) + attributes = append(attributes, *fileNameAttr) } // sets Timestamp attribute if it wasn't set from header and enabled by settings if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() { @@ -146,36 +205,59 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { BufferMaxSize: h.config.BufferMaxSizeForPut(), } + var idObj oid.ID if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil { h.handlePutFrostFSErr(c, err, log) - return + return oid.ID{}, err + } + log.Debug(logs.ObjectUploaded, + zap.String("oid", idObj.EncodeToString()), + zap.String("FileName", fileName), + ) + + return idObj, nil +} + +// explodeArchive read files from archive and creates objects for each of them. +// Sets FilePath attribute with name from tar.Header. +func (h *Handler) explodeArchive(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, formFile io.ReadCloser) { + // default reader - without gzip decompression + accReader := newAccumulatedReader(formFile) + var reader io.Reader + if gzipReader, err := gzip.NewReader(accReader); err == nil { + reader = gzipReader + defer func() { + if err := gzipReader.Close(); err != nil { + log.Warn(logs.FailedToCloseReader, zap.Error(err)) + } + }() + } else { + log.Info(logs.CompressionCheckFailed, zap.Error(err)) + accReader.Restore() + reader = accReader } - addr.SetObject(idObj) - addr.SetContainer(bktInfo.CID) - - // Try to return the response, otherwise, if something went wrong, throw an error. - if err = newPutResponse(addr).encode(c); err != nil { - log.Error(logs.CouldNotEncodeResponse, zap.Error(err)) - response.Error(c, "could not encode response", fasthttp.StatusBadRequest) - - return - } - // Multipart is multipart and thus can contain more than one part which - // we ignore at the moment. Also, when dealing with chunked encoding - // the last zero-length chunk might be left unread (because multipart - // reader only cares about its boundary and doesn't look further) and - // it will be (erroneously) interpreted as the start of the next - // pipelined header. Thus we need to drain the body buffer. + tarReader := tar.NewReader(reader) for { - _, err = bodyStream.Read(drainBuf) - if err == io.EOF || err == io.ErrUnexpectedEOF { + obj, err := tarReader.Next() + if errors.Is(err, io.EOF) { break + } else if err != nil { + log.Error(logs.FailedToReadFileFromTar, zap.Error(err)) + response.Error(c, "could not read tar header: "+err.Error(), fasthttp.StatusBadRequest) + return + } + if isDir(obj.Name) { + continue + } + c.Request.Header.Set(utils.UserAttributeHeaderPrefix+object.AttributeFilePath, obj.Name) + _, err = h.uploadObject(c, log, bktInfo, filepath.Base(obj.Name), tarReader) + if err != nil { + log.Error(logs.FailedToUploadObject, zap.Error(err)) + response.Error(c, "could not upload object: "+err.Error(), fasthttp.StatusBadRequest) + return } } - // Report status code and content type. - c.Response.SetStatusCode(fasthttp.StatusOK) - c.Response.Header.SetContentType(jsonHeader) } func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) { diff --git a/internal/handler/upload_test.go b/internal/handler/upload_test.go new file mode 100644 index 0000000..abeebd1 --- /dev/null +++ b/internal/handler/upload_test.go @@ -0,0 +1 @@ +package handler diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 3dc804b..23e5de0 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -25,7 +25,7 @@ const ( CantGracefullyShutDownService = "can't gracefully shut down service, force stop" // Error in ../../metrics/service.go IgnorePartEmptyFormName = "ignore part, empty form name" // Debug in ../../uploader/upload.go IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go - CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go + FailedToCloseTemporaryMultipartFormFile = "failed to close temporary multipart/form file" // Warn in ../../uploader/upload.go CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go @@ -89,4 +89,10 @@ const ( MultinetDialFail = "multinet dial failed" FailedToLoadMultinetConfig = "failed to load multinet config" MultinetConfigWontBeUpdated = "multinet config won't be updated" + FailedToFilterHeaders = "failed to filter headers" + FailedToReadFileFromTar = "failed to read file from tar" + FailedToUploadObject = "failed to upload object" + ObjectUploaded = "object uploaded" + FailedToCloseReader = "failed to close reader" + CompressionCheckFailed = "compression check failed" )