[#170] Support tar.gz exploding
During upload if "X-Attribute-Explode-Archive" is set, gate tries to read tar.gz archive and creates an object for each file. Each object acquires a FilePath attribute which is calculated relative to the archive root Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
parent
e81f01c2ab
commit
69ad977577
6 changed files with 109 additions and 45 deletions
|
@ -36,7 +36,7 @@ func (h *Handler) DownloadByAddressOrBucketName(c *fasthttp.RequestCtx) {
|
|||
}
|
||||
}
|
||||
|
||||
func (h *Handler) newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request {
|
||||
func newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request {
|
||||
return &request{
|
||||
RequestCtx: ctx,
|
||||
log: log,
|
||||
|
|
|
@ -39,7 +39,7 @@ func filterHeaders(l *zap.Logger, header *fasthttp.RequestHeader) (map[string]st
|
|||
// check if key gets duplicated
|
||||
// return error containing full key name (with prefix)
|
||||
if _, ok := result[string(clearKey)]; ok {
|
||||
err = fmt.Errorf("key duplication error: %s", string(key))
|
||||
err = fmt.Errorf("header key duplication error: %s", string(key))
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ func (h *Handler) byNativeAddress(c *fasthttp.RequestCtx, f func(context.Context
|
|||
|
||||
addr := newAddress(bktInfo.CID, *objID)
|
||||
|
||||
f(ctx, *h.newRequest(c, log), addr)
|
||||
f(ctx, *newRequest(c, log), addr)
|
||||
}
|
||||
|
||||
// byS3Path is a wrapper for function (e.g. request.headObject, request.receiveFile) that
|
||||
|
@ -257,7 +257,7 @@ func (h *Handler) byS3Path(c *fasthttp.RequestCtx, f func(context.Context, reque
|
|||
}
|
||||
addr := newAddress(bktInfo.CID, foundOid.OID)
|
||||
|
||||
f(ctx, *h.newRequest(c, log), addr)
|
||||
f(ctx, *newRequest(c, log), addr)
|
||||
}
|
||||
|
||||
// byAttribute is a wrapper similar to byNativeAddress.
|
||||
|
@ -319,7 +319,7 @@ func (h *Handler) byAttribute(c *fasthttp.RequestCtx, f func(context.Context, re
|
|||
addrObj.SetContainer(bktInfo.CID)
|
||||
addrObj.SetObject(buf[0])
|
||||
|
||||
f(ctx, *h.newRequest(c, log), addrObj)
|
||||
f(ctx, *newRequest(c, log), addrObj)
|
||||
}
|
||||
|
||||
// resolveContainer decode container id, if it's not a valid container id
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
|
||||
|
@ -20,8 +25,9 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
jsonHeader = "application/json; charset=UTF-8"
|
||||
drainBufSize = 4096
|
||||
jsonHeader = "application/json; charset=UTF-8"
|
||||
drainBufSize = 4096
|
||||
explodeArchiveHeader = "Explode-Archive"
|
||||
)
|
||||
|
||||
type putResponse struct {
|
||||
|
@ -44,11 +50,7 @@ func (pr *putResponse) encode(w io.Writer) error {
|
|||
|
||||
// Upload handles multipart upload request.
|
||||
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
||||
var (
|
||||
file MultipartFile
|
||||
idObj oid.ID
|
||||
addr oid.Address
|
||||
)
|
||||
var file MultipartFile
|
||||
|
||||
scid, _ := c.UserValue("cid").(string)
|
||||
bodyStream := c.RequestBodyStream()
|
||||
|
@ -72,7 +74,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
err := file.Close()
|
||||
log.Debug(
|
||||
logs.CloseTemporaryMultipartFormFile,
|
||||
zap.Stringer("address", addr),
|
||||
zap.Stringer("container", bktInfo.CID),
|
||||
zap.String("filename", file.FileName()),
|
||||
zap.Error(err),
|
||||
)
|
||||
|
@ -85,11 +87,51 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
return
|
||||
}
|
||||
|
||||
if header := c.Request.Header.Peek(utils.UserAttributeHeaderPrefix + explodeArchiveHeader); header != nil {
|
||||
h.explodeGzip(c, log, bktInfo, file)
|
||||
} else {
|
||||
h.uploadSingleObject(c, log, bktInfo, file)
|
||||
}
|
||||
|
||||
// Multipart is multipart and thus can contain more than one part which
|
||||
// we ignore at the moment. Also, when dealing with chunked encoding
|
||||
// the last zero-length chunk might be left unread (because multipart
|
||||
// reader only cares about its boundary and doesn't look further) and
|
||||
// it will be (erroneously) interpreted as the start of the next
|
||||
// pipelined header. Thus we need to drain the body buffer.
|
||||
for {
|
||||
_, err = bodyStream.Read(drainBuf)
|
||||
if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) uploadSingleObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file MultipartFile) {
|
||||
idObj, err := h.uploadObject(c, log, bktInfo, file.FileName(), file)
|
||||
if err != nil {
|
||||
log.Error(logs.FailedToUploadObject, zap.Error(err))
|
||||
return
|
||||
}
|
||||
addr := newAddress(bktInfo.CID, idObj)
|
||||
|
||||
// Try to return the response, otherwise, if something went wrong, throw an error.
|
||||
if err = newPutResponse(addr).encode(c); err != nil {
|
||||
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
|
||||
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
c.Response.Header.SetContentType(jsonHeader)
|
||||
}
|
||||
|
||||
func (h *Handler) uploadObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, fileName string, file io.Reader) (oid.ID, error) {
|
||||
ctx := utils.GetContextFromRequest(c)
|
||||
filtered, err := filterHeaders(log, &c.Request.Header)
|
||||
if err != nil {
|
||||
log.Error(logs.CouldNotProcessHeaders, zap.Error(err))
|
||||
response.Error(c, err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
log.Error(logs.FailedToFilterHeaders, zap.Error(err))
|
||||
response.Error(c, "could not filter headers", fasthttp.StatusBadRequest)
|
||||
return oid.ID{}, err
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
|
@ -104,7 +146,7 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
|
||||
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
|
||||
response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
return oid.ID{}, err
|
||||
}
|
||||
|
||||
attributes := make([]object.Attribute, 0, len(filtered))
|
||||
|
@ -117,10 +159,10 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
}
|
||||
// sets FileName attribute if it wasn't set from header
|
||||
if _, ok := filtered[object.AttributeFileName]; !ok {
|
||||
filename := object.NewAttribute()
|
||||
filename.SetKey(object.AttributeFileName)
|
||||
filename.SetValue(file.FileName())
|
||||
attributes = append(attributes, *filename)
|
||||
fileNameAttr := object.NewAttribute()
|
||||
fileNameAttr.SetKey(object.AttributeFileName)
|
||||
fileNameAttr.SetValue(fileName)
|
||||
attributes = append(attributes, *fileNameAttr)
|
||||
}
|
||||
// sets Timestamp attribute if it wasn't set from header and enabled by settings
|
||||
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
|
||||
|
@ -146,36 +188,51 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
BufferMaxSize: h.config.BufferMaxSizeForPut(),
|
||||
}
|
||||
|
||||
var idObj oid.ID
|
||||
if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil {
|
||||
h.handlePutFrostFSErr(c, err, log)
|
||||
return oid.ID{}, err
|
||||
}
|
||||
return idObj, nil
|
||||
}
|
||||
|
||||
// explodeGzip read files from tar.gz archive and creates objects for each of them.
|
||||
// Sets FilePath attribute with name from tar.Header.
|
||||
func (h *Handler) explodeGzip(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file io.Reader) {
|
||||
gzipReader, err := gzip.NewReader(file)
|
||||
if err != nil {
|
||||
log.Error(logs.FailedToCreateReader, zap.Error(err))
|
||||
response.Error(c, "could not create gzip reader: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
addr.SetObject(idObj)
|
||||
addr.SetContainer(bktInfo.CID)
|
||||
|
||||
// Try to return the response, otherwise, if something went wrong, throw an error.
|
||||
if err = newPutResponse(addr).encode(c); err != nil {
|
||||
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
|
||||
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
|
||||
|
||||
return
|
||||
}
|
||||
// Multipart is multipart and thus can contain more than one part which
|
||||
// we ignore at the moment. Also, when dealing with chunked encoding
|
||||
// the last zero-length chunk might be left unread (because multipart
|
||||
// reader only cares about its boundary and doesn't look further) and
|
||||
// it will be (erroneously) interpreted as the start of the next
|
||||
// pipelined header. Thus we need to drain the body buffer.
|
||||
for {
|
||||
_, err = bodyStream.Read(drainBuf)
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
break
|
||||
defer func() {
|
||||
if err := gzipReader.Close(); err != nil {
|
||||
log.Error(logs.FailedToCloseReader, zap.Error(err))
|
||||
}
|
||||
}()
|
||||
|
||||
tarReader := tar.NewReader(gzipReader)
|
||||
var obj *tar.Header
|
||||
|
||||
for {
|
||||
obj, err = tarReader.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
log.Error(logs.FailedToReadFileFromTar, zap.Error(err))
|
||||
continue
|
||||
}
|
||||
if isDir(obj.Name) {
|
||||
continue
|
||||
}
|
||||
c.Request.Header.Set(utils.UserAttributeHeaderPrefix+object.AttributeFilePath, obj.Name)
|
||||
idObj, err := h.uploadObject(c, log, bktInfo, filepath.Base(obj.Name), tarReader)
|
||||
if err != nil {
|
||||
log.Error(logs.FailedToUploadObject, zap.Error(err))
|
||||
response.Error(c, "could not upload object: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
}
|
||||
log.Debug(logs.ObjectUploaded, zap.String("object ID", idObj.EncodeToString()))
|
||||
}
|
||||
// Report status code and content type.
|
||||
c.Response.SetStatusCode(fasthttp.StatusOK)
|
||||
c.Response.Header.SetContentType(jsonHeader)
|
||||
}
|
||||
|
||||
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) {
|
||||
|
|
1
internal/handler/upload_test.go
Normal file
1
internal/handler/upload_test.go
Normal file
|
@ -0,0 +1 @@
|
|||
package handler
|
|
@ -27,6 +27,12 @@ const (
|
|||
CouldNotProcessHeaders = "could not process headers" // Error in ../../uploader/upload.go
|
||||
CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go
|
||||
CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go
|
||||
FailedToCloseReader = "failed to close reader" // Error in ../../uploader/upload.go
|
||||
FailedToCreateReader = "failed to create reader" // Error in ../../uploader/upload.go
|
||||
FailedToReadFileFromTar = "failed to read file from tar" // Error in ../../uploader/upload.go
|
||||
FailedToFilterHeaders = "failed to filter headers" // Error in ../../uploader/upload.go
|
||||
FailedToUploadObject = "failed to upload object" // Error in ../../uploader/upload.go
|
||||
ObjectUploaded = "object uploaded" // Debug in ../../uploader/upload.go
|
||||
CouldNotEncodeResponse = "could not encode response" // Error in ../../uploader/upload.go
|
||||
CouldNotStoreFileInFrostfs = "could not store file in frostfs" // Error in ../../uploader/upload.go
|
||||
AddAttributeToResultObject = "add attribute to result object" // Debug in ../../uploader/filter.go
|
||||
|
|
Loading…
Reference in a new issue