[#170] Support .tar/.tgz unpacking during upload

During upload if X-Explode-Archive is set, gate tries to read archive and create an object for each file.
Each object acquires a FilePath attribute which is calculated relative to the archive root.
Archive could have compression via Gzip if "Content-Encoding: gzip" header is specified

Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
Nikita Zinkevich 2024-12-06 15:01:16 +03:00
parent 7901d00924
commit 1e7309684b
Signed by: nzinkevich
GPG key ID: 748EA1D0B2E6420A
5 changed files with 185 additions and 92 deletions

View file

@ -257,7 +257,7 @@ func (h *Handler) putObjectToArchive(ctx context.Context, log *zap.Logger, cnrID
}
func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) {
scid := cnrID.EncodeToString()
scid, _ := c.UserValue("cid").(string)
prefix, _ := c.UserValue("prefix").(string)
ctx := utils.GetContextFromRequest(c)

View file

@ -42,7 +42,9 @@ func fetchMultipartFile(l *zap.Logger, r io.Reader, boundary string) (MultipartF
// ignore multipart/form-data values
if filename == "" {
l.Debug(logs.IgnorePartEmptyFilename, zap.String("form", name))
if err = part.Close(); err != nil {
l.Warn(logs.FailedToCloseReader, zap.Error(err))
}
continue
}

View file

@ -1,13 +1,19 @@
package handler
import (
"archive/tar"
"bytes"
"compress/gzip"
"context"
"encoding/json"
"errors"
"io"
"net/http"
"path/filepath"
"strconv"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils"
@ -19,8 +25,9 @@ import (
)
const (
jsonHeader = "application/json; charset=UTF-8"
drainBufSize = 4096
jsonHeader = "application/json; charset=UTF-8"
drainBufSize = 4096
explodeArchiveHeader = "X-Explode-Archive"
)
type putResponse struct {
@ -43,11 +50,7 @@ func (pr *putResponse) encode(w io.Writer) error {
// Upload handles multipart upload request.
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
var (
file MultipartFile
idObj oid.ID
addr oid.Address
)
var file MultipartFile
scid, _ := c.UserValue("cid").(string)
bodyStream := c.RequestBodyStream()
@ -63,20 +66,6 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
return
}
defer func() {
// If the temporary reader can be closed - let's close it.
if file == nil {
return
}
err := file.Close()
log.Debug(
logs.CloseTemporaryMultipartFormFile,
zap.Stringer("address", addr),
zap.String("filename", file.FileName()),
zap.Error(err),
)
}()
boundary := string(c.Request.Header.MultipartFormBoundary())
if file, err = fetchMultipartFile(log, bodyStream, boundary); err != nil {
log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err))
@ -86,53 +75,69 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
filtered, err := filterHeaders(log, &c.Request.Header)
if err != nil {
log.Error(logs.CouldNotProcessHeaders, zap.Error(err))
log.Error(logs.FailedToFilterHeaders, zap.Error(err))
ResponseError(c, err.Error(), fasthttp.StatusBadRequest)
return
}
now := time.Now()
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
} else {
now = parsed
}
if c.Request.Header.Peek(explodeArchiveHeader) != nil {
h.explodeArchive(request{c, log}, bktInfo, file, filtered)
} else {
h.uploadSingleObject(request{c, log}, bktInfo, file, filtered)
}
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
ResponseError(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
// Multipart is multipart and thus can contain more than one part which
// we ignore at the moment. Also, when dealing with chunked encoding
// the last zero-length chunk might be left unread (because multipart
// reader only cares about its boundary and doesn't look further) and
// it will be (erroneously) interpreted as the start of the next
// pipelined header. Thus, we need to drain the body buffer.
for {
_, err = bodyStream.Read(drainBuf)
if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) {
break
}
}
}
func (h *Handler) uploadSingleObject(req request, bkt *data.BucketInfo, file MultipartFile, filtered map[string]string) {
c, log := req.RequestCtx, req.log
setIfNotExist(filtered, object.AttributeFileName, file.FileName())
attributes, err := h.extractAttributes(c, log, filtered)
if err != nil {
log.Error(logs.FailedToGetAttributes, zap.Error(err))
ResponseError(c, "could not extract attributes: "+err.Error(), fasthttp.StatusBadRequest)
return
}
attributes := make([]object.Attribute, 0, len(filtered))
// prepares attributes from filtered headers
for key, val := range filtered {
attribute := object.NewAttribute()
attribute.SetKey(key)
attribute.SetValue(val)
attributes = append(attributes, *attribute)
idObj, err := h.uploadObject(c, bkt, attributes, file)
if err != nil {
h.handlePutFrostFSErr(c, err, log)
return
}
// sets FileName attribute if it wasn't set from header
if _, ok := filtered[object.AttributeFileName]; !ok {
filename := object.NewAttribute()
filename.SetKey(object.AttributeFileName)
filename.SetValue(file.FileName())
attributes = append(attributes, *filename)
}
// sets Timestamp attribute if it wasn't set from header and enabled by settings
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
timestamp := object.NewAttribute()
timestamp.SetKey(object.AttributeTimestamp)
timestamp.SetValue(strconv.FormatInt(time.Now().Unix(), 10))
attributes = append(attributes, *timestamp)
log.Debug(logs.ObjectUploaded,
zap.String("oid", idObj.EncodeToString()),
zap.String("FileName", file.FileName()),
)
addr := newAddress(bkt.CID, idObj)
c.Response.Header.SetContentType(jsonHeader)
// Try to return the response, otherwise, if something went wrong, throw an error.
if err = newPutResponse(addr).encode(c); err != nil {
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
ResponseError(c, "could not encode response", fasthttp.StatusBadRequest)
return
}
}
func (h *Handler) uploadObject(c *fasthttp.RequestCtx, bkt *data.BucketInfo, attrs []object.Attribute, file io.Reader) (oid.ID, error) {
ctx := utils.GetContextFromRequest(c)
obj := object.New()
obj.SetContainerID(bktInfo.CID)
obj.SetContainerID(bkt.CID)
obj.SetOwnerID(*h.ownerID)
obj.SetAttributes(attributes...)
obj.SetAttributes(attrs...)
prm := PrmObjectCreate{
PrmAuth: PrmAuth{
@ -141,40 +146,120 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
Object: obj,
Payload: file,
ClientCut: h.config.ClientCut(),
WithoutHomomorphicHash: bktInfo.HomomorphicHashDisabled,
WithoutHomomorphicHash: bkt.HomomorphicHashDisabled,
BufferMaxSize: h.config.BufferMaxSizeForPut(),
}
if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil {
h.handlePutFrostFSErr(c, err, log)
return
idObj, err := h.frostfs.CreateObject(ctx, prm)
if err != nil {
return oid.ID{}, err
}
addr.SetObject(idObj)
addr.SetContainer(bktInfo.CID)
return idObj, nil
}
// Try to return the response, otherwise, if something went wrong, throw an error.
if err = newPutResponse(addr).encode(c); err != nil {
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
ResponseError(c, "could not encode response", fasthttp.StatusBadRequest)
return
}
// Multipart is multipart and thus can contain more than one part which
// we ignore at the moment. Also, when dealing with chunked encoding
// the last zero-length chunk might be left unread (because multipart
// reader only cares about its boundary and doesn't look further) and
// it will be (erroneously) interpreted as the start of the next
// pipelined header. Thus we need to drain the body buffer.
for {
_, err = bodyStream.Read(drainBuf)
if err == io.EOF || err == io.ErrUnexpectedEOF {
break
func (h *Handler) extractAttributes(c *fasthttp.RequestCtx, log *zap.Logger, filtered map[string]string) ([]object.Attribute, error) {
now := time.Now()
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
} else {
now = parsed
}
}
// Report status code and content type.
c.Response.SetStatusCode(fasthttp.StatusOK)
c.Response.Header.SetContentType(jsonHeader)
if err := utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
return nil, err
}
attributes := make([]object.Attribute, 0, len(filtered))
// prepares attributes from filtered headers
for key, val := range filtered {
attribute := newAttribute(key, val)
attributes = append(attributes, attribute)
}
// sets Timestamp attribute if it wasn't set from header and enabled by settings
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
timestamp := newAttribute(object.AttributeTimestamp, strconv.FormatInt(time.Now().Unix(), 10))
attributes = append(attributes, timestamp)
}
return attributes, nil
}
func newAttribute(key string, val string) object.Attribute {
attr := object.NewAttribute()
attr.SetKey(key)
attr.SetValue(val)
return *attr
}
// explodeArchive read files from archive and creates objects for each of them.
// Sets FilePath attribute with name from tar.Header.
func (h *Handler) explodeArchive(req request, bkt *data.BucketInfo, file io.ReadCloser, filtered map[string]string) {
c, log := req.RequestCtx, req.log
// remove user attributes which vary for each file in archive
// to guarantee that they won't appear twice
delete(filtered, object.AttributeFileName)
delete(filtered, object.AttributeFilePath)
commonAttributes, err := h.extractAttributes(c, log, filtered)
if err != nil {
log.Error(logs.FailedToGetAttributes, zap.Error(err))
ResponseError(c, "could not extract attributes: "+err.Error(), fasthttp.StatusBadRequest)
return
}
attributes := commonAttributes
reader := file
if bytes.EqualFold(c.Request.Header.Peek(fasthttp.HeaderContentEncoding), []byte("gzip")) {
log.Debug(logs.GzipReaderSelected)
gzipReader, err := gzip.NewReader(file)
if err != nil {
log.Error(logs.FailedToCreateGzipReader, zap.Error(err))
ResponseError(c, "could read gzip file: "+err.Error(), fasthttp.StatusBadRequest)
return
}
defer func() {
if err := gzipReader.Close(); err != nil {
log.Warn(logs.FailedToCloseReader, zap.Error(err))
}
}()
reader = gzipReader
}
tarReader := tar.NewReader(reader)
for {
obj, err := tarReader.Next()
if errors.Is(err, io.EOF) {
break
} else if err != nil {
log.Error(logs.FailedToReadFileFromTar, zap.Error(err))
ResponseError(c, "could not get next entry: "+err.Error(), fasthttp.StatusBadRequest)
return
}
if isDir(obj.Name) {
continue
}
// set varying attributes
attributes = attributes[:len(commonAttributes)]
fileName := filepath.Base(obj.Name)
attributes = append(attributes, newAttribute(object.AttributeFilePath, obj.Name))
attributes = append(attributes, newAttribute(object.AttributeFileName, fileName))
idObj, err := h.uploadObject(c, bkt, attributes, tarReader)
if err != nil {
h.handlePutFrostFSErr(c, err, log)
return
}
log.Debug(logs.ObjectUploaded,
zap.String("oid", idObj.EncodeToString()),
zap.String("FileName", fileName),
)
}
}
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) {

View file

@ -101,6 +101,13 @@ func newAddress(cnr cid.ID, obj oid.ID) oid.Address {
return addr
}
// setIfNotExist sets key value to map if key is not present yet.
func setIfNotExist(m map[string]string, key, value string) {
if _, ok := m[key]; !ok {
m[key] = value
}
}
func ResponseError(r *fasthttp.RequestCtx, msg string, code int) {
r.Error(msg+"\n", code)
}

View file

@ -4,8 +4,6 @@ const (
CouldntParseCreationDate = "couldn't parse creation date" // Info in ../../downloader/*
CouldNotDetectContentTypeFromPayload = "could not detect Content-Type from payload" // Error in ../../downloader/download.go
CouldNotReceiveObject = "could not receive object" // Error in ../../downloader/download.go
WrongObjectID = "wrong object id" // Error in ../../downloader/download.go
GetLatestObjectVersion = "get latest object version" // Error in ../../downloader/download.go
ObjectWasDeleted = "object was deleted" // Error in ../../downloader/download.go
CouldNotSearchForObjects = "could not search for objects" // Error in ../../downloader/download.go
ObjectNotFound = "object not found" // Error in ../../downloader/download.go
@ -15,8 +13,6 @@ const (
IteratingOverSelectedObjectsFailed = "iterating over selected objects failed" // Error in ../../downloader/download.go
ObjectsNotFound = "objects not found" // Error in ../../downloader/download.go
CloseZipWriter = "close zip writer" // Error in ../../downloader/download.go
CloseGzipWriter = "close gzip writer" // Error in ../../downloader/download.go
CloseTarWriter = "close tar writer" // Error in ../../downloader/download.go
ServiceIsRunning = "service is running" // Info in ../../metrics/service.go
ServiceCouldntStartOnConfiguredPort = "service couldn't start on configured port" // Warn in ../../metrics/service.go
ServiceHasntStartedSinceItsDisabled = "service hasn't started since it's disabled" // Info in ../../metrics/service.go
@ -25,7 +21,6 @@ const (
CantGracefullyShutDownService = "can't gracefully shut down service, force stop" // Error in ../../metrics/service.go
IgnorePartEmptyFormName = "ignore part, empty form name" // Debug in ../../uploader/upload.go
IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go
CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go
CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go
CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go
CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go
@ -81,11 +76,6 @@ const (
InvalidLifetimeUsingDefaultValue = "invalid lifetime, using default value (in seconds)" // Error in ../../cmd/http-gw/settings.go
InvalidCacheSizeUsingDefaultValue = "invalid cache size, using default value" // Error in ../../cmd/http-gw/settings.go
FailedToUnescapeQuery = "failed to unescape query"
FailedToParseAddressInTreeNode = "failed to parse object addr in tree node"
SettingsNodeInvalidOwnerKey = "settings node: invalid owner key"
SystemNodeHasMultipleIDs = "system node has multiple ids"
FailedToRemoveOldSystemNode = "failed to remove old system node"
BucketSettingsNodeHasMultipleIDs = "bucket settings node has multiple ids"
ServerReconnecting = "reconnecting server..."
ServerReconnectedSuccessfully = "server reconnected successfully"
ServerReconnectFailed = "failed to reconnect server"
@ -96,4 +86,13 @@ const (
MultinetConfigWontBeUpdated = "multinet config won't be updated"
ObjectNotFoundByFilePathTrySearchByFileName = "object not found by filePath attribute, try search by fileName"
CouldntCacheNetmap = "couldn't cache netmap"
FailedToFilterHeaders = "failed to filter headers"
FailedToReadFileFromTar = "failed to read file from tar"
FailedToGetAttributes = "failed to get attributes"
ObjectUploaded = "object uploaded"
CloseGzipWriter = "close gzip writer"
CloseTarWriter = "close tar writer"
FailedToCloseReader = "failed to close reader"
FailedToCreateGzipReader = "failed to create gzip reader"
GzipReaderSelected = "gzip reader selected"
)