[#170] Support .tar/.tgz unpacking during upload
During upload if "X-Explode-Archive" is set, gate tries to read archive and creates an object for each file. Each object acquires a FilePath attribute which is calculated relative to the archive root. Archive could have compression via Gzip if "Content-Encoding: gzip" header is specified Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
parent
510727512c
commit
c9050c9daf
6 changed files with 170 additions and 85 deletions
|
@ -265,7 +265,7 @@ func (h *Handler) putObjectToArchive(ctx context.Context, log *zap.Logger, cnrID
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) {
|
func (h *Handler) searchObjectsByPrefix(c *fasthttp.RequestCtx, log *zap.Logger, cnrID cid.ID) (ResObjectSearch, error) {
|
||||||
scid := cnrID.EncodeToString()
|
scid, _ := c.UserValue("cid").(string)
|
||||||
prefix, _ := c.UserValue("prefix").(string)
|
prefix, _ := c.UserValue("prefix").(string)
|
||||||
|
|
||||||
ctx := utils.GetContextFromRequest(c)
|
ctx := utils.GetContextFromRequest(c)
|
||||||
|
@ -297,7 +297,7 @@ func writeToArchive(resGet *Object, objWriter io.Writer, bufZip []byte) error {
|
||||||
if err = resGet.Payload.Close(); err != nil {
|
if err = resGet.Payload.Close(); err != nil {
|
||||||
return fmt.Errorf("object body close error: %w", err)
|
return fmt.Errorf("object body close error: %w", err)
|
||||||
}
|
}
|
||||||
// TODO: проверить вовзрат кода и zip.Flush и префикс
|
// TODO: проверить zip.Flush
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ func filterHeaders(l *zap.Logger, header *fasthttp.RequestHeader) (map[string]st
|
||||||
// check if key gets duplicated
|
// check if key gets duplicated
|
||||||
// return error containing full key name (with prefix)
|
// return error containing full key name (with prefix)
|
||||||
if _, ok := result[string(clearKey)]; ok {
|
if _, ok := result[string(clearKey)]; ok {
|
||||||
err = fmt.Errorf("key duplication error: %s", string(key))
|
err = fmt.Errorf("header key duplication error: %s", string(key))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,9 @@ func fetchMultipartFile(l *zap.Logger, r io.Reader, boundary string) (MultipartF
|
||||||
// ignore multipart/form-data values
|
// ignore multipart/form-data values
|
||||||
if filename == "" {
|
if filename == "" {
|
||||||
l.Debug(logs.IgnorePartEmptyFilename, zap.String("form", name))
|
l.Debug(logs.IgnorePartEmptyFilename, zap.String("form", name))
|
||||||
|
if err = part.Close(); err != nil {
|
||||||
|
l.Warn(logs.FailedToCloseReader, zap.Error(err))
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,19 @@
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"archive/tar"
|
||||||
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
|
||||||
|
@ -22,6 +28,7 @@ import (
|
||||||
const (
|
const (
|
||||||
jsonHeader = "application/json; charset=UTF-8"
|
jsonHeader = "application/json; charset=UTF-8"
|
||||||
drainBufSize = 4096
|
drainBufSize = 4096
|
||||||
|
explodeArchiveHeader = "X-Explode-Archive"
|
||||||
)
|
)
|
||||||
|
|
||||||
type putResponse struct {
|
type putResponse struct {
|
||||||
|
@ -44,11 +51,7 @@ func (pr *putResponse) encode(w io.Writer) error {
|
||||||
|
|
||||||
// Upload handles multipart upload request.
|
// Upload handles multipart upload request.
|
||||||
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
||||||
var (
|
var file MultipartFile
|
||||||
file MultipartFile
|
|
||||||
idObj oid.ID
|
|
||||||
addr oid.Address
|
|
||||||
)
|
|
||||||
|
|
||||||
scid, _ := c.UserValue("cid").(string)
|
scid, _ := c.UserValue("cid").(string)
|
||||||
bodyStream := c.RequestBodyStream()
|
bodyStream := c.RequestBodyStream()
|
||||||
|
@ -64,20 +67,6 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
defer func() {
|
|
||||||
// If the temporary reader can be closed - let's close it.
|
|
||||||
if file == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
err := file.Close()
|
|
||||||
log.Debug(
|
|
||||||
logs.CloseTemporaryMultipartFormFile,
|
|
||||||
zap.Stringer("address", addr),
|
|
||||||
zap.String("filename", file.FileName()),
|
|
||||||
zap.Error(err),
|
|
||||||
)
|
|
||||||
}()
|
|
||||||
|
|
||||||
boundary := string(c.Request.Header.MultipartFormBoundary())
|
boundary := string(c.Request.Header.MultipartFormBoundary())
|
||||||
if file, err = fetchMultipartFile(log, bodyStream, boundary); err != nil {
|
if file, err = fetchMultipartFile(log, bodyStream, boundary); err != nil {
|
||||||
log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err))
|
log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err))
|
||||||
|
@ -85,50 +74,54 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
filtered, err := filterHeaders(log, &c.Request.Header)
|
if c.Request.Header.Peek(explodeArchiveHeader) != nil {
|
||||||
if err != nil {
|
h.explodeArchive(c, log, bktInfo, file)
|
||||||
log.Error(logs.CouldNotProcessHeaders, zap.Error(err))
|
|
||||||
response.Error(c, err.Error(), fasthttp.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
now := time.Now()
|
|
||||||
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
|
|
||||||
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
|
|
||||||
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
|
|
||||||
} else {
|
} else {
|
||||||
now = parsed
|
h.uploadSingleObject(c, log, bktInfo, file)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
|
// Multipart is multipart and thus can contain more than one part which
|
||||||
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
|
// we ignore at the moment. Also, when dealing with chunked encoding
|
||||||
response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
|
// the last zero-length chunk might be left unread (because multipart
|
||||||
|
// reader only cares about its boundary and doesn't look further) and
|
||||||
|
// it will be (erroneously) interpreted as the start of the next
|
||||||
|
// pipelined header. Thus, we need to drain the body buffer.
|
||||||
|
for {
|
||||||
|
_, err = bodyStream.Read(drainBuf)
|
||||||
|
if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) uploadSingleObject(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, file MultipartFile) {
|
||||||
|
attributes, err := h.extractAttributes(c, log, file.FileName())
|
||||||
|
if err != nil {
|
||||||
|
log.Error(logs.FailedToGetAttributes, zap.Error(err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
idObj, err := h.uploadObject(c, bktInfo, attributes, file)
|
||||||
|
if err != nil {
|
||||||
|
h.handlePutFrostFSErr(c, err, log)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Debug(logs.ObjectUploaded,
|
||||||
|
zap.String("oid", idObj.EncodeToString()),
|
||||||
|
zap.String("FileName", file.FileName()),
|
||||||
|
)
|
||||||
|
addr := newAddress(bktInfo.CID, idObj)
|
||||||
|
|
||||||
attributes := make([]object.Attribute, 0, len(filtered))
|
c.Response.Header.SetContentType(jsonHeader)
|
||||||
// prepares attributes from filtered headers
|
// Try to return the response, otherwise, if something went wrong, throw an error.
|
||||||
for key, val := range filtered {
|
if err = newPutResponse(addr).encode(c); err != nil {
|
||||||
attribute := object.NewAttribute()
|
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
|
||||||
attribute.SetKey(key)
|
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
|
||||||
attribute.SetValue(val)
|
return
|
||||||
attributes = append(attributes, *attribute)
|
|
||||||
}
|
|
||||||
// sets FileName attribute if it wasn't set from header
|
|
||||||
if _, ok := filtered[object.AttributeFileName]; !ok {
|
|
||||||
filename := object.NewAttribute()
|
|
||||||
filename.SetKey(object.AttributeFileName)
|
|
||||||
filename.SetValue(file.FileName())
|
|
||||||
attributes = append(attributes, *filename)
|
|
||||||
}
|
|
||||||
// sets Timestamp attribute if it wasn't set from header and enabled by settings
|
|
||||||
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
|
|
||||||
timestamp := object.NewAttribute()
|
|
||||||
timestamp.SetKey(object.AttributeTimestamp)
|
|
||||||
timestamp.SetValue(strconv.FormatInt(time.Now().Unix(), 10))
|
|
||||||
attributes = append(attributes, *timestamp)
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) uploadObject(c *fasthttp.RequestCtx, bktInfo *data.BucketInfo, attributes []object.Attribute, file io.Reader) (oid.ID, error) {
|
||||||
|
ctx := utils.GetContextFromRequest(c)
|
||||||
|
|
||||||
obj := object.New()
|
obj := object.New()
|
||||||
obj.SetContainerID(bktInfo.CID)
|
obj.SetContainerID(bktInfo.CID)
|
||||||
|
@ -146,36 +139,118 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
||||||
BufferMaxSize: h.config.BufferMaxSizeForPut(),
|
BufferMaxSize: h.config.BufferMaxSizeForPut(),
|
||||||
}
|
}
|
||||||
|
|
||||||
if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil {
|
idObj, err := h.frostfs.CreateObject(ctx, prm)
|
||||||
|
if err != nil {
|
||||||
|
return oid.ID{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return idObj, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) extractAttributes(c *fasthttp.RequestCtx, log *zap.Logger, fileName string) ([]object.Attribute, error) {
|
||||||
|
filtered, err := filterHeaders(log, &c.Request.Header)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(logs.FailedToFilterHeaders, zap.Error(err))
|
||||||
|
response.Error(c, "could not filter headers", fasthttp.StatusBadRequest)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
|
||||||
|
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
|
||||||
|
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
|
||||||
|
} else {
|
||||||
|
now = parsed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
|
||||||
|
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
|
||||||
|
response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes := make([]object.Attribute, 0, len(filtered))
|
||||||
|
// prepares attributes from filtered headers
|
||||||
|
for key, val := range filtered {
|
||||||
|
attribute := newAttribute(key, val)
|
||||||
|
attributes = append(attributes, *attribute)
|
||||||
|
}
|
||||||
|
// sets FileName attribute if it wasn't set from header
|
||||||
|
if _, ok := filtered[object.AttributeFileName]; !ok {
|
||||||
|
fileNameAttr := newAttribute(object.AttributeFileName, fileName)
|
||||||
|
attributes = append(attributes, *fileNameAttr)
|
||||||
|
}
|
||||||
|
// sets Timestamp attribute if it wasn't set from header and enabled by settings
|
||||||
|
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
|
||||||
|
timestamp := newAttribute(object.AttributeTimestamp, strconv.FormatInt(time.Now().Unix(), 10))
|
||||||
|
attributes = append(attributes, *timestamp)
|
||||||
|
}
|
||||||
|
|
||||||
|
return attributes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newAttribute(key string, val string) *object.Attribute {
|
||||||
|
attr := object.NewAttribute()
|
||||||
|
attr.SetKey(key)
|
||||||
|
attr.SetValue(val)
|
||||||
|
return attr
|
||||||
|
}
|
||||||
|
|
||||||
|
// explodeArchive read files from archive and creates objects for each of them.
|
||||||
|
// Sets FilePath attribute with name from tar.Header.
|
||||||
|
func (h *Handler) explodeArchive(c *fasthttp.RequestCtx, log *zap.Logger, bktInfo *data.BucketInfo, formFile io.ReadCloser) {
|
||||||
|
var reader = formFile
|
||||||
|
if bytes.EqualFold(c.Request.Header.Peek(fasthttp.HeaderContentEncoding), []byte("gzip")) {
|
||||||
|
log.Debug(logs.GzipReaderSelected)
|
||||||
|
gzipReader, err := gzip.NewReader(formFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(logs.FailedToCreateGzipReader, zap.Error(err))
|
||||||
|
response.Error(c, "could read gzip file: "+err.Error(), fasthttp.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := gzipReader.Close(); err != nil {
|
||||||
|
log.Warn(logs.FailedToCloseReader, zap.Error(err))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
reader = gzipReader
|
||||||
|
}
|
||||||
|
|
||||||
|
tarReader := tar.NewReader(reader)
|
||||||
|
for {
|
||||||
|
obj, err := tarReader.Next()
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
log.Error(logs.FailedToReadFileFromTar, zap.Error(err))
|
||||||
|
response.Error(c, "could not get next entry: "+err.Error(), fasthttp.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if isDir(obj.Name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
fileName := filepath.Base(obj.Name)
|
||||||
|
attributes, err := h.extractAttributes(c, log, fileName)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(logs.FailedToGetAttributes, zap.Error(err))
|
||||||
|
response.Error(c, "could not extract attributes: "+err.Error(), fasthttp.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// add filepath attribute
|
||||||
|
attributes = append(attributes, *newAttribute(utils.UserAttributeHeaderPrefix+object.AttributeFilePath, obj.Name))
|
||||||
|
|
||||||
|
idObj, err := h.uploadObject(c, bktInfo, attributes, tarReader)
|
||||||
|
if err != nil {
|
||||||
h.handlePutFrostFSErr(c, err, log)
|
h.handlePutFrostFSErr(c, err, log)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
log.Debug(logs.ObjectUploaded,
|
||||||
addr.SetObject(idObj)
|
zap.String("oid", idObj.EncodeToString()),
|
||||||
addr.SetContainer(bktInfo.CID)
|
zap.String("FileName", fileName),
|
||||||
|
)
|
||||||
// Try to return the response, otherwise, if something went wrong, throw an error.
|
|
||||||
if err = newPutResponse(addr).encode(c); err != nil {
|
|
||||||
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
|
|
||||||
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
// Multipart is multipart and thus can contain more than one part which
|
|
||||||
// we ignore at the moment. Also, when dealing with chunked encoding
|
|
||||||
// the last zero-length chunk might be left unread (because multipart
|
|
||||||
// reader only cares about its boundary and doesn't look further) and
|
|
||||||
// it will be (erroneously) interpreted as the start of the next
|
|
||||||
// pipelined header. Thus we need to drain the body buffer.
|
|
||||||
for {
|
|
||||||
_, err = bodyStream.Read(drainBuf)
|
|
||||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Report status code and content type.
|
|
||||||
c.Response.SetStatusCode(fasthttp.StatusOK)
|
|
||||||
c.Response.Header.SetContentType(jsonHeader)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) {
|
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) {
|
||||||
|
|
1
internal/handler/upload_test.go
Normal file
1
internal/handler/upload_test.go
Normal file
|
@ -0,0 +1 @@
|
||||||
|
package handler
|
|
@ -25,7 +25,7 @@ const (
|
||||||
CantGracefullyShutDownService = "can't gracefully shut down service, force stop" // Error in ../../metrics/service.go
|
CantGracefullyShutDownService = "can't gracefully shut down service, force stop" // Error in ../../metrics/service.go
|
||||||
IgnorePartEmptyFormName = "ignore part, empty form name" // Debug in ../../uploader/upload.go
|
IgnorePartEmptyFormName = "ignore part, empty form name" // Debug in ../../uploader/upload.go
|
||||||
IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go
|
IgnorePartEmptyFilename = "ignore part, empty filename" // Debug in ../../uploader/upload.go
|
||||||
CloseTemporaryMultipartFormFile = "close temporary multipart/form file" // Debug in ../../uploader/upload.go
|
FailedToCloseTemporaryMultipartFormFile = "failed to close temporary multipart/form file" // Warn in ../../uploader/upload.go
|
||||||
CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go
|
CouldNotReceiveMultipartForm = "could not receive multipart/form" // Error in ../../uploader/upload.go
|
||||||
CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go
|
CouldNotParseClientTime = "could not parse client time" // Warn in ../../uploader/upload.go
|
||||||
CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go
|
CouldNotPrepareExpirationHeader = "could not prepare expiration header" // Error in ../../uploader/upload.go
|
||||||
|
@ -95,4 +95,11 @@ const (
|
||||||
FailedToLoadMultinetConfig = "failed to load multinet config"
|
FailedToLoadMultinetConfig = "failed to load multinet config"
|
||||||
MultinetConfigWontBeUpdated = "multinet config won't be updated"
|
MultinetConfigWontBeUpdated = "multinet config won't be updated"
|
||||||
ObjectNotFoundByFilePathTrySearchByFileName = "object not found by filePath attribute, try search by fileName"
|
ObjectNotFoundByFilePathTrySearchByFileName = "object not found by filePath attribute, try search by fileName"
|
||||||
|
FailedToFilterHeaders = "failed to filter headers"
|
||||||
|
FailedToReadFileFromTar = "failed to read file from tar"
|
||||||
|
FailedToGetAttributes = "failed to get attributes"
|
||||||
|
ObjectUploaded = "object uploaded"
|
||||||
|
FailedToCloseReader = "failed to close reader"
|
||||||
|
FailedToCreateGzipReader = "failed to create gzip reader"
|
||||||
|
GzipReaderSelected = "gzip reader selected"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue