[#170] Support .tar/.tgz unpacking during upload
During upload if X-Explode-Archive is set, gate tries to read archive and create an object for each file. Each object acquires a FilePath attribute which is calculated relative to the archive root. Archive could have compression via Gzip if "Content-Encoding: gzip" header is specified Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
parent
7901d00924
commit
1e7309684b
5 changed files with 185 additions and 92 deletions
|
@ -1,13 +1,19 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils"
|
||||
|
@ -19,8 +25,9 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
jsonHeader = "application/json; charset=UTF-8"
|
||||
drainBufSize = 4096
|
||||
jsonHeader = "application/json; charset=UTF-8"
|
||||
drainBufSize = 4096
|
||||
explodeArchiveHeader = "X-Explode-Archive"
|
||||
)
|
||||
|
||||
type putResponse struct {
|
||||
|
@ -43,11 +50,7 @@ func (pr *putResponse) encode(w io.Writer) error {
|
|||
|
||||
// Upload handles multipart upload request.
|
||||
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
||||
var (
|
||||
file MultipartFile
|
||||
idObj oid.ID
|
||||
addr oid.Address
|
||||
)
|
||||
var file MultipartFile
|
||||
|
||||
scid, _ := c.UserValue("cid").(string)
|
||||
bodyStream := c.RequestBodyStream()
|
||||
|
@ -63,20 +66,6 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// If the temporary reader can be closed - let's close it.
|
||||
if file == nil {
|
||||
return
|
||||
}
|
||||
err := file.Close()
|
||||
log.Debug(
|
||||
logs.CloseTemporaryMultipartFormFile,
|
||||
zap.Stringer("address", addr),
|
||||
zap.String("filename", file.FileName()),
|
||||
zap.Error(err),
|
||||
)
|
||||
}()
|
||||
|
||||
boundary := string(c.Request.Header.MultipartFormBoundary())
|
||||
if file, err = fetchMultipartFile(log, bodyStream, boundary); err != nil {
|
||||
log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err))
|
||||
|
@ -86,53 +75,69 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
|
||||
filtered, err := filterHeaders(log, &c.Request.Header)
|
||||
if err != nil {
|
||||
log.Error(logs.CouldNotProcessHeaders, zap.Error(err))
|
||||
log.Error(logs.FailedToFilterHeaders, zap.Error(err))
|
||||
ResponseError(c, err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
|
||||
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
|
||||
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
|
||||
} else {
|
||||
now = parsed
|
||||
}
|
||||
if c.Request.Header.Peek(explodeArchiveHeader) != nil {
|
||||
h.explodeArchive(request{c, log}, bktInfo, file, filtered)
|
||||
} else {
|
||||
h.uploadSingleObject(request{c, log}, bktInfo, file, filtered)
|
||||
}
|
||||
|
||||
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
|
||||
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
|
||||
ResponseError(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
// Multipart is multipart and thus can contain more than one part which
|
||||
// we ignore at the moment. Also, when dealing with chunked encoding
|
||||
// the last zero-length chunk might be left unread (because multipart
|
||||
// reader only cares about its boundary and doesn't look further) and
|
||||
// it will be (erroneously) interpreted as the start of the next
|
||||
// pipelined header. Thus, we need to drain the body buffer.
|
||||
for {
|
||||
_, err = bodyStream.Read(drainBuf)
|
||||
if err == io.EOF || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) uploadSingleObject(req request, bkt *data.BucketInfo, file MultipartFile, filtered map[string]string) {
|
||||
c, log := req.RequestCtx, req.log
|
||||
setIfNotExist(filtered, object.AttributeFileName, file.FileName())
|
||||
|
||||
attributes, err := h.extractAttributes(c, log, filtered)
|
||||
if err != nil {
|
||||
log.Error(logs.FailedToGetAttributes, zap.Error(err))
|
||||
ResponseError(c, "could not extract attributes: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
attributes := make([]object.Attribute, 0, len(filtered))
|
||||
// prepares attributes from filtered headers
|
||||
for key, val := range filtered {
|
||||
attribute := object.NewAttribute()
|
||||
attribute.SetKey(key)
|
||||
attribute.SetValue(val)
|
||||
attributes = append(attributes, *attribute)
|
||||
idObj, err := h.uploadObject(c, bkt, attributes, file)
|
||||
if err != nil {
|
||||
h.handlePutFrostFSErr(c, err, log)
|
||||
return
|
||||
}
|
||||
// sets FileName attribute if it wasn't set from header
|
||||
if _, ok := filtered[object.AttributeFileName]; !ok {
|
||||
filename := object.NewAttribute()
|
||||
filename.SetKey(object.AttributeFileName)
|
||||
filename.SetValue(file.FileName())
|
||||
attributes = append(attributes, *filename)
|
||||
}
|
||||
// sets Timestamp attribute if it wasn't set from header and enabled by settings
|
||||
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
|
||||
timestamp := object.NewAttribute()
|
||||
timestamp.SetKey(object.AttributeTimestamp)
|
||||
timestamp.SetValue(strconv.FormatInt(time.Now().Unix(), 10))
|
||||
attributes = append(attributes, *timestamp)
|
||||
log.Debug(logs.ObjectUploaded,
|
||||
zap.String("oid", idObj.EncodeToString()),
|
||||
zap.String("FileName", file.FileName()),
|
||||
)
|
||||
|
||||
addr := newAddress(bkt.CID, idObj)
|
||||
c.Response.Header.SetContentType(jsonHeader)
|
||||
// Try to return the response, otherwise, if something went wrong, throw an error.
|
||||
if err = newPutResponse(addr).encode(c); err != nil {
|
||||
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
|
||||
ResponseError(c, "could not encode response", fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) uploadObject(c *fasthttp.RequestCtx, bkt *data.BucketInfo, attrs []object.Attribute, file io.Reader) (oid.ID, error) {
|
||||
ctx := utils.GetContextFromRequest(c)
|
||||
|
||||
obj := object.New()
|
||||
obj.SetContainerID(bktInfo.CID)
|
||||
obj.SetContainerID(bkt.CID)
|
||||
obj.SetOwnerID(*h.ownerID)
|
||||
obj.SetAttributes(attributes...)
|
||||
obj.SetAttributes(attrs...)
|
||||
|
||||
prm := PrmObjectCreate{
|
||||
PrmAuth: PrmAuth{
|
||||
|
@ -141,40 +146,120 @@ func (h *Handler) Upload(c *fasthttp.RequestCtx) {
|
|||
Object: obj,
|
||||
Payload: file,
|
||||
ClientCut: h.config.ClientCut(),
|
||||
WithoutHomomorphicHash: bktInfo.HomomorphicHashDisabled,
|
||||
WithoutHomomorphicHash: bkt.HomomorphicHashDisabled,
|
||||
BufferMaxSize: h.config.BufferMaxSizeForPut(),
|
||||
}
|
||||
|
||||
if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil {
|
||||
h.handlePutFrostFSErr(c, err, log)
|
||||
return
|
||||
idObj, err := h.frostfs.CreateObject(ctx, prm)
|
||||
if err != nil {
|
||||
return oid.ID{}, err
|
||||
}
|
||||
|
||||
addr.SetObject(idObj)
|
||||
addr.SetContainer(bktInfo.CID)
|
||||
return idObj, nil
|
||||
}
|
||||
|
||||
// Try to return the response, otherwise, if something went wrong, throw an error.
|
||||
if err = newPutResponse(addr).encode(c); err != nil {
|
||||
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
|
||||
ResponseError(c, "could not encode response", fasthttp.StatusBadRequest)
|
||||
|
||||
return
|
||||
}
|
||||
// Multipart is multipart and thus can contain more than one part which
|
||||
// we ignore at the moment. Also, when dealing with chunked encoding
|
||||
// the last zero-length chunk might be left unread (because multipart
|
||||
// reader only cares about its boundary and doesn't look further) and
|
||||
// it will be (erroneously) interpreted as the start of the next
|
||||
// pipelined header. Thus we need to drain the body buffer.
|
||||
for {
|
||||
_, err = bodyStream.Read(drainBuf)
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
break
|
||||
func (h *Handler) extractAttributes(c *fasthttp.RequestCtx, log *zap.Logger, filtered map[string]string) ([]object.Attribute, error) {
|
||||
now := time.Now()
|
||||
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
|
||||
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
|
||||
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
|
||||
} else {
|
||||
now = parsed
|
||||
}
|
||||
}
|
||||
// Report status code and content type.
|
||||
c.Response.SetStatusCode(fasthttp.StatusOK)
|
||||
c.Response.Header.SetContentType(jsonHeader)
|
||||
if err := utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
|
||||
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
attributes := make([]object.Attribute, 0, len(filtered))
|
||||
// prepares attributes from filtered headers
|
||||
for key, val := range filtered {
|
||||
attribute := newAttribute(key, val)
|
||||
attributes = append(attributes, attribute)
|
||||
}
|
||||
// sets Timestamp attribute if it wasn't set from header and enabled by settings
|
||||
if _, ok := filtered[object.AttributeTimestamp]; !ok && h.config.DefaultTimestamp() {
|
||||
timestamp := newAttribute(object.AttributeTimestamp, strconv.FormatInt(time.Now().Unix(), 10))
|
||||
attributes = append(attributes, timestamp)
|
||||
}
|
||||
|
||||
return attributes, nil
|
||||
}
|
||||
|
||||
func newAttribute(key string, val string) object.Attribute {
|
||||
attr := object.NewAttribute()
|
||||
attr.SetKey(key)
|
||||
attr.SetValue(val)
|
||||
return *attr
|
||||
}
|
||||
|
||||
// explodeArchive read files from archive and creates objects for each of them.
|
||||
// Sets FilePath attribute with name from tar.Header.
|
||||
func (h *Handler) explodeArchive(req request, bkt *data.BucketInfo, file io.ReadCloser, filtered map[string]string) {
|
||||
c, log := req.RequestCtx, req.log
|
||||
|
||||
// remove user attributes which vary for each file in archive
|
||||
// to guarantee that they won't appear twice
|
||||
delete(filtered, object.AttributeFileName)
|
||||
delete(filtered, object.AttributeFilePath)
|
||||
|
||||
commonAttributes, err := h.extractAttributes(c, log, filtered)
|
||||
if err != nil {
|
||||
log.Error(logs.FailedToGetAttributes, zap.Error(err))
|
||||
ResponseError(c, "could not extract attributes: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
attributes := commonAttributes
|
||||
|
||||
reader := file
|
||||
if bytes.EqualFold(c.Request.Header.Peek(fasthttp.HeaderContentEncoding), []byte("gzip")) {
|
||||
log.Debug(logs.GzipReaderSelected)
|
||||
gzipReader, err := gzip.NewReader(file)
|
||||
if err != nil {
|
||||
log.Error(logs.FailedToCreateGzipReader, zap.Error(err))
|
||||
ResponseError(c, "could read gzip file: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := gzipReader.Close(); err != nil {
|
||||
log.Warn(logs.FailedToCloseReader, zap.Error(err))
|
||||
}
|
||||
}()
|
||||
reader = gzipReader
|
||||
}
|
||||
|
||||
tarReader := tar.NewReader(reader)
|
||||
for {
|
||||
obj, err := tarReader.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
log.Error(logs.FailedToReadFileFromTar, zap.Error(err))
|
||||
ResponseError(c, "could not get next entry: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if isDir(obj.Name) {
|
||||
continue
|
||||
}
|
||||
|
||||
// set varying attributes
|
||||
attributes = attributes[:len(commonAttributes)]
|
||||
fileName := filepath.Base(obj.Name)
|
||||
attributes = append(attributes, newAttribute(object.AttributeFilePath, obj.Name))
|
||||
attributes = append(attributes, newAttribute(object.AttributeFileName, fileName))
|
||||
|
||||
idObj, err := h.uploadObject(c, bkt, attributes, tarReader)
|
||||
if err != nil {
|
||||
h.handlePutFrostFSErr(c, err, log)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug(logs.ObjectUploaded,
|
||||
zap.String("oid", idObj.EncodeToString()),
|
||||
zap.String("FileName", fileName),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error, log *zap.Logger) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue