From c9050c9dafcf05781a0000e3cdb35a8528a1e2c7 Mon Sep 17 00:00:00 2001 From: Nikita Zinkevich Date: Fri, 6 Dec 2024 15:01:16 +0300 Subject: [PATCH] [#170] Support .tar/.tgz unpacking during upload During upload if "X-Explode-Archive" is set, gate tries to read archive and creates an object for each file. Each object acquires a FilePath attribute which is calculated relative to the archive root. Archive could have compression via Gzip if "Content-Encoding: gzip" header is specified Signed-off-by: Nikita Zinkevich --- internal/handler/download.go | 4 +- internal/handler/filter.go | 2 +- internal/handler/multipart.go | 4 +- internal/handler/upload.go | 235 +++++++++++++++++++++----------- internal/handler/upload_test.go | 1 + internal/logs/logs.go | 9 +- 6 files changed, 170 insertions(+), 85 deletions(-) create mode 100644 internal/handler/upload_test.go diff --git a/internal/handler/download.go b/internal/handler/download.go index aa2f5ec..3d34a30 100644 --- a/internal/handler/download.go +++ b/internal/handler/download.go @@ -265,7 +265,7 @@ func (h *Handler) putObjectToArchive(ctx context.Context, log *zap.Logger, cnrID } func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) { - scid := cnrID.EncodeToString() + scid, _ := c.UserValue("cid").(string) prefix, _ := c.UserValue("prefix").(string) ctx := utils.GetContextFromRequest(c) @@ -297,7 +297,7 @@ func writeToArchive(resGet *Object, objWriter io.Writer, bufZip []byte) error { if err = resGet.Payload.Close(); err != nil { return fmt.Errorf("object body close error: %w", err) } -// TODO: проверить вовзрат кода и zip.Flush и префикс + // TODO: проверить zip.Flush return nil } diff --git a/internal/handler/filter.go b/internal/handler/filter.go index 745718a..9d50d81 100644 --- a/internal/handler/filter.go +++ b/internal/handler/filter.go @@ -39,7 +39,7 @@ func filterHeaders(l *zap.Logger, header *fasthttp.RequestHeader) (map[string]st // check if key gets duplicated // return error containing full key name (with prefix) if _, ok := result[string(clearKey)]; ok { - err = fmt.Errorf("key duplication error: %s", string(key)) + err = fmt.Errorf("header key duplication error: %s", string(key)) return } diff --git a/internal/handler/multipart.go b/internal/handler/multipart.go index 213286c..ebf5edd 100644 --- a/internal/handler/multipart.go +++ b/internal/handler/multipart.go @@ -42,7 +42,9 @@ func fetchMultipartFile(l *zap.Logger, r io.Reader, boundary string) (MultipartF // ignore multipart/form-data values if filename == "" { l.Debug(logs.IgnorePartEmptyFilename, zap.String("form", name)) - + if err = part.Close(); err != nil { + l.Warn(logs.FailedToCloseReader, zap.Error(err)) + } continue } diff --git a/internal/handler/upload.go b/internal/handler/upload.go index 867025d..523af4e 100644 --- a/internal/handler/upload.go +++ b/internal/handler/upload.go @@ -1,13 +1,19 @@ package handler import ( + "archive/tar" + "bytes" + "compress/gzip" "context" "encoding/json" + "errors" "io" "net/http" + "path/filepath" "strconv" "time" + "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/response" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens" @@ -20,8 +26,9 @@ import ( ) const ( - jsonHeader = "application/json; charset=UTF-8" - drainBufSize = 4096 + jsonHeader = "application/json; charset=UTF-8" + drainBufSize = 4096 + explodeArchiveHeader = "X-Explode-Archive" ) type putResponse struct { @@ -44,11 +51,7 @@ func (pr *putResponse) encode(w io.Writer) error { // Upload handles multipart upload request. func (h *Handler) Upload(c *fasthttp.RequestCtx) { - var ( - file MultipartFile - idObj oid.ID - addr oid.Address - ) + var file MultipartFile scid, _ := c.UserValue("cid").(string) bodyStream := c.RequestBodyStream() @@ -64,20 +67,6 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { return } - defer func() { - // If the temporary reader can be closed - let's close it. - if file == nil { - return - } - err := file.Close() - log.Debug( - logs.CloseTemporaryMultipartFormFile, - zap.Stringer("address", addr), - zap.String("filename", file.FileName()), - zap.Error(err), - ) - }() - boundary := string(c.Request.Header.MultipartFormBoundary()) if file, err = fetchMultipartFile(log, bodyStream, boundary); err != nil { log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err)) @@ -85,50 +74,54 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { return } - filtered, err := filterHeaders(log, &c.Request.Header) - if err != nil { - log.Error(logs.CouldNotProcessHeaders, zap.Error(err)) - response.Error(c, err.Error(), fasthttp.StatusBadRequest) - return + if c.Request.Header.Peek(explodeArchiveHeader) != nil { + h.explodeArchive(c, log, bktInfo, file) + } else { + h.uploadSingleObject(c, log, bktInfo, file) } - now := time.Now() - if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil { - if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil { - log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err)) - } else { - now = parsed + // Multipart is multipart and thus can contain more than one part which + // we ignore at the moment. Also, when dealing with chunked encoding + // the last zero-length chunk might be left unread (because multipart + // reader only cares about its boundary and doesn't look further) and + // it will be (erroneously) interpreted as the start of the next + // pipelined header. Thus, we need to drain the body buffer. + for { + _, err = bodyStream.Read(drainBuf) + if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) { + break } } +} - if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil { - log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err)) - response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest) +func (h *Handler) uploadSingleObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file MultipartFile) { + attributes, err := h.extractAttributes(c, log, file.FileName()) + if err != nil { + log.Error(logs.FailedToGetAttributes, zap.Error(err)) return } + idObj, err := h.uploadObject(c, bktInfo, attributes, file) + if err != nil { + h.handlePutFrostFSErr(c, err, log) + return + } + log.Debug(logs.ObjectUploaded, + zap.String("oid", idObj.EncodeToString()), + zap.String("FileName", file.FileName()), + ) + addr := newAddress(bktInfo.CID, idObj) - attributes := make([]object.Attribute, 0, len(filtered)) - // prepares attributes from filtered headers - for key, val := range filtered { - attribute := object.NewAttribute() - attribute.SetKey(key) - attribute.SetValue(val) - attributes = append(attributes, *attribute) - } - // sets FileName attribute if it wasn't set from header - if _, ok := filtered[object.AttributeFileName]; !ok { - filename := object.NewAttribute() - filename.SetKey(object.AttributeFileName) - filename.SetValue(file.FileName()) - attributes = append(attributes, *filename) - } - // sets Timestamp attribute if it wasn't set from header and enabled by settings - if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() { - timestamp := object.NewAttribute() - timestamp.SetKey(object.AttributeTimestamp) - timestamp.SetValue(strconv.FormatInt(time.Now().Unix(), 10)) - attributes = append(attributes, *timestamp) + c.Response.Header.SetContentType(jsonHeader) + // Try to return the response, otherwise, if something went wrong, throw an error. + if err = newPutResponse(addr).encode(c); err != nil { + log.Error(logs.CouldNotEncodeResponse, zap.Error(err)) + response.Error(c, "could not encode response", fasthttp.StatusBadRequest) + return } +} + +func (h *Handler) uploadObject(c *fasthttp.RequestCtx, bktInfo *data.BucketInfo, attributes []object.Attribute, file io.Reader) (oid.ID, error) { + ctx := utils.GetContextFromRequest(c) obj := object.New() obj.SetContainerID(bktInfo.CID) @@ -146,36 +139,118 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) { BufferMaxSize: h.config.BufferMaxSizeForPut(), } - if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil { - h.handlePutFrostFSErr(c, err, log) - return + idObj, err := h.frostfs.CreateObject(ctx, prm) + if err != nil { + return oid.ID{}, err } - addr.SetObject(idObj) - addr.SetContainer(bktInfo.CID) + return idObj, nil +} - // Try to return the response, otherwise, if something went wrong, throw an error. - if err = newPutResponse(addr).encode(c); err != nil { - log.Error(logs.CouldNotEncodeResponse, zap.Error(err)) - response.Error(c, "could not encode response", fasthttp.StatusBadRequest) - - return +func (h *Handler) extractAttributes(c *fasthttp.RequestCtx, log *zap.Logger, fileName string) ([]object.Attribute, error) { + filtered, err := filterHeaders(log, &c.Request.Header) + if err != nil { + log.Error(logs.FailedToFilterHeaders, zap.Error(err)) + response.Error(c, "could not filter headers", fasthttp.StatusBadRequest) + return nil, err } - // Multipart is multipart and thus can contain more than one part which - // we ignore at the moment. Also, when dealing with chunked encoding - // the last zero-length chunk might be left unread (because multipart - // reader only cares about its boundary and doesn't look further) and - // it will be (erroneously) interpreted as the start of the next - // pipelined header. Thus we need to drain the body buffer. - for { - _, err = bodyStream.Read(drainBuf) - if err == io.EOF || err == io.ErrUnexpectedEOF { - break + + now := time.Now() + if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil { + if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil { + log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err)) + } else { + now = parsed } } - // Report status code and content type. - c.Response.SetStatusCode(fasthttp.StatusOK) - c.Response.Header.SetContentType(jsonHeader) + + if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil { + log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err)) + response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest) + return nil, err + } + + attributes := make([]object.Attribute, 0, len(filtered)) + // prepares attributes from filtered headers + for key, val := range filtered { + attribute := newAttribute(key, val) + attributes = append(attributes, *attribute) + } + // sets FileName attribute if it wasn't set from header + if _, ok := filtered[object.AttributeFileName]; !ok { + fileNameAttr := newAttribute(object.AttributeFileName, fileName) + attributes = append(attributes, *fileNameAttr) + } + // sets Timestamp attribute if it wasn't set from header and enabled by settings + if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() { + timestamp := newAttribute(object.AttributeTimestamp, strconv.FormatInt(time.Now().Unix(), 10)) + attributes = append(attributes, *timestamp) + } + + return attributes, nil +} + +func newAttribute(key string, val string) *object.Attribute { + attr := object.NewAttribute() + attr.SetKey(key) + attr.SetValue(val) + return attr +} + +// explodeArchive read files from archive and creates objects for each of them. +// Sets FilePath attribute with name from tar.Header. +func (h *Handler) explodeArchive(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, formFile io.ReadCloser) { + var reader = formFile + if bytes.EqualFold(c.Request.Header.Peek(fasthttp.HeaderContentEncoding), []byte("gzip")) { + log.Debug(logs.GzipReaderSelected) + gzipReader, err := gzip.NewReader(formFile) + if err != nil { + log.Error(logs.FailedToCreateGzipReader, zap.Error(err)) + response.Error(c, "could read gzip file: "+err.Error(), fasthttp.StatusBadRequest) + return + } + defer func() { + if err := gzipReader.Close(); err != nil { + log.Warn(logs.FailedToCloseReader, zap.Error(err)) + } + }() + reader = gzipReader + } + + tarReader := tar.NewReader(reader) + for { + obj, err := tarReader.Next() + if errors.Is(err, io.EOF) { + break + } else if err != nil { + log.Error(logs.FailedToReadFileFromTar, zap.Error(err)) + response.Error(c, "could not get next entry: "+err.Error(), fasthttp.StatusBadRequest) + return + } + if isDir(obj.Name) { + continue + } + + fileName := filepath.Base(obj.Name) + attributes, err := h.extractAttributes(c, log, fileName) + if err != nil { + log.Error(logs.FailedToGetAttributes, zap.Error(err)) + response.Error(c, "could not extract attributes: "+err.Error(), fasthttp.StatusBadRequest) + return + } + // add filepath attribute + attributes = append(attributes, *newAttribute(utils.UserAttributeHeaderPrefix+object.AttributeFilePath, obj.Name)) + + idObj, err := h.uploadObject(c, bktInfo, attributes, tarReader) + if err != nil { + h.handlePutFrostFSErr(c, err, log) + return + } + log.Debug(logs.ObjectUploaded, + zap.String("oid", idObj.EncodeToString()), + zap.String("FileName", fileName), + ) + } } func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) { diff --git a/internal/handler/upload_test.go b/internal/handler/upload_test.go new file mode 100644 index 0000000..abeebd1 --- /dev/null +++ b/internal/handler/upload_test.go @@ -0,0 +1 @@ +package handler diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 48d951c..7e949bf 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -25,7 +25,7 @@ const ( CantGracefullyShutDownService = "can't gracefully shut down service, force stop" // Error in ../../metrics/service.go IgnorePartEmptyFormName = "ignore part, empty form name" // Debug in ../../uploader/upload.go IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go - CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go + FailedToCloseTemporaryMultipartFormFile = "failed to close temporary multipart/form file" // Warn in ../../uploader/upload.go CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go @@ -95,4 +95,11 @@ const ( FailedToLoadMultinetConfig = "failed to load multinet config" MultinetConfigWontBeUpdated = "multinet config won't be updated" ObjectNotFoundByFilePathTrySearchByFileName = "object not found by filePath attribute, try search by fileName" + FailedToFilterHeaders = "failed to filter headers" + FailedToReadFileFromTar = "failed to read file from tar" + FailedToGetAttributes = "failed to get attributes" + ObjectUploaded = "object uploaded" + FailedToCloseReader = "failed to close reader" + FailedToCreateGzipReader = "failed to create gzip reader" + GzipReaderSelected = "gzip reader selected" )