forked from TrueCloudLab/frostfs-http-gw
536 lines
15 KiB
Go
536 lines
15 KiB
Go
package downloader
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/resolver"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tree"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/bearer"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container"
|
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool"
|
|
"github.com/valyala/fasthttp"
|
|
"go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
type request struct {
|
|
*fasthttp.RequestCtx
|
|
log *zap.Logger
|
|
}
|
|
|
|
func isValidToken(s string) bool {
|
|
for _, c := range s {
|
|
if c <= ' ' || c > 127 {
|
|
return false
|
|
}
|
|
if strings.ContainsRune("()<>@,;:\\\"/[]?={}", c) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func isValidValue(s string) bool {
|
|
for _, c := range s {
|
|
// HTTP specification allows for more technically, but we don't want to escape things.
|
|
if c < ' ' || c > 127 || c == '"' {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
type readCloser struct {
|
|
io.Reader
|
|
io.Closer
|
|
}
|
|
|
|
// initializes io.Reader with the limited size and detects Content-Type from it.
|
|
// Returns r's error directly. Also returns the processed data.
|
|
func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error)) (string, []byte, error) {
|
|
if maxSize > sizeToDetectType {
|
|
maxSize = sizeToDetectType
|
|
}
|
|
|
|
buf := make([]byte, maxSize) // maybe sync-pool the slice?
|
|
|
|
r, err := rInit(maxSize)
|
|
if err != nil {
|
|
return "", nil, err
|
|
}
|
|
|
|
n, err := r.Read(buf)
|
|
if err != nil && err != io.EOF {
|
|
return "", nil, err
|
|
}
|
|
|
|
buf = buf[:n]
|
|
|
|
return http.DetectContentType(buf), buf, err // to not lose io.EOF
|
|
}
|
|
|
|
func receiveFile(ctx context.Context, req request, clnt *pool.Pool, objectAddress oid.Address) {
|
|
var (
|
|
err error
|
|
dis = "inline"
|
|
start = time.Now()
|
|
filename string
|
|
)
|
|
|
|
var prm pool.PrmObjectGet
|
|
prm.SetAddress(objectAddress)
|
|
if btoken := bearerToken(ctx); btoken != nil {
|
|
prm.UseBearer(*btoken)
|
|
}
|
|
|
|
rObj, err := clnt.GetObject(ctx, prm)
|
|
if err != nil {
|
|
req.handleFrostFSErr(err, start)
|
|
return
|
|
}
|
|
|
|
// we can't close reader in this function, so how to do it?
|
|
|
|
if req.Request.URI().QueryArgs().GetBool("download") {
|
|
dis = "attachment"
|
|
}
|
|
|
|
payloadSize := rObj.Header.PayloadSize()
|
|
|
|
req.Response.Header.Set(fasthttp.HeaderContentLength, strconv.FormatUint(payloadSize, 10))
|
|
var contentType string
|
|
for _, attr := range rObj.Header.Attributes() {
|
|
key := attr.Key()
|
|
val := attr.Value()
|
|
if !isValidToken(key) || !isValidValue(val) {
|
|
continue
|
|
}
|
|
|
|
key = utils.BackwardTransformIfSystem(key)
|
|
|
|
req.Response.Header.Set(utils.UserAttributeHeaderPrefix+key, val)
|
|
switch key {
|
|
case object.AttributeFileName:
|
|
filename = val
|
|
case object.AttributeTimestamp:
|
|
value, err := strconv.ParseInt(val, 10, 64)
|
|
if err != nil {
|
|
req.log.Info("couldn't parse creation date",
|
|
zap.String("key", key),
|
|
zap.String("val", val),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
req.Response.Header.Set(fasthttp.HeaderLastModified,
|
|
time.Unix(value, 0).UTC().Format(http.TimeFormat))
|
|
case object.AttributeContentType:
|
|
contentType = val
|
|
}
|
|
}
|
|
|
|
idsToResponse(&req.Response, &rObj.Header)
|
|
|
|
if len(contentType) == 0 {
|
|
// determine the Content-Type from the payload head
|
|
var payloadHead []byte
|
|
|
|
contentType, payloadHead, err = readContentType(payloadSize, func(uint64) (io.Reader, error) {
|
|
return rObj.Payload, nil
|
|
})
|
|
if err != nil && err != io.EOF {
|
|
req.log.Error("could not detect Content-Type from payload", zap.Error(err))
|
|
response.Error(req.RequestCtx, "could not detect Content-Type from payload: "+err.Error(), fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// reset payload reader since a part of the data has been read
|
|
var headReader io.Reader = bytes.NewReader(payloadHead)
|
|
|
|
if err != io.EOF { // otherwise, we've already read full payload
|
|
headReader = io.MultiReader(headReader, rObj.Payload)
|
|
}
|
|
|
|
// note: we could do with io.Reader, but SetBodyStream below closes body stream
|
|
// if it implements io.Closer and that's useful for us.
|
|
rObj.Payload = readCloser{headReader, rObj.Payload}
|
|
}
|
|
req.SetContentType(contentType)
|
|
|
|
req.Response.Header.Set(fasthttp.HeaderContentDisposition, dis+"; filename="+path.Base(filename))
|
|
|
|
req.Response.SetBodyStream(rObj.Payload, int(payloadSize))
|
|
}
|
|
|
|
func bearerToken(ctx context.Context) *bearer.Token {
|
|
if tkn, err := tokens.LoadBearerToken(ctx); err == nil {
|
|
return tkn
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *request) handleFrostFSErr(err error, start time.Time) {
|
|
logFields := []zap.Field{
|
|
zap.Stringer("elapsed", time.Since(start)),
|
|
zap.Error(err),
|
|
}
|
|
statusCode, msg, additionalFields := response.FormErrorResponse("could not receive object", err)
|
|
logFields = append(logFields, additionalFields...)
|
|
|
|
r.log.Error("could not receive object", logFields...)
|
|
response.Error(r.RequestCtx, msg, statusCode)
|
|
}
|
|
|
|
// Downloader is a download request handler.
|
|
type Downloader struct {
|
|
log *zap.Logger
|
|
pool *pool.Pool
|
|
containerResolver *resolver.ContainerResolver
|
|
settings *Settings
|
|
tree *tree.Tree
|
|
}
|
|
|
|
// Settings stores reloading parameters, so it has to provide atomic getters and setters.
|
|
type Settings struct {
|
|
zipCompression atomic.Bool
|
|
}
|
|
|
|
func (s *Settings) ZipCompression() bool {
|
|
return s.zipCompression.Load()
|
|
}
|
|
|
|
func (s *Settings) SetZipCompression(val bool) {
|
|
s.zipCompression.Store(val)
|
|
}
|
|
|
|
// New creates an instance of Downloader using specified options.
|
|
func New(params *utils.AppParams, settings *Settings, tree *tree.Tree) *Downloader {
|
|
return &Downloader{
|
|
log: params.Logger,
|
|
pool: params.Pool,
|
|
settings: settings,
|
|
containerResolver: params.Resolver,
|
|
tree: tree,
|
|
}
|
|
}
|
|
|
|
func (d *Downloader) newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request {
|
|
return &request{
|
|
RequestCtx: ctx,
|
|
log: log,
|
|
}
|
|
}
|
|
|
|
// DownloadByAddressOrBucketName handles download requests using simple cid/oid or bucketname/key format.
|
|
func (d *Downloader) DownloadByAddressOrBucketName(c *fasthttp.RequestCtx) {
|
|
test, _ := c.UserValue("oid").(string)
|
|
var id oid.ID
|
|
err := id.DecodeString(test)
|
|
if err != nil {
|
|
d.byBucketname(c, receiveFile)
|
|
} else {
|
|
d.byAddress(c, receiveFile)
|
|
}
|
|
}
|
|
|
|
// byAddress is a wrapper for function (e.g. request.headObject, request.receiveFile) that
|
|
// prepares request and object address to it.
|
|
func (d *Downloader) byAddress(c *fasthttp.RequestCtx, f func(context.Context, request, *pool.Pool, oid.Address)) {
|
|
var (
|
|
idCnr, _ = c.UserValue("cid").(string)
|
|
idObj, _ = c.UserValue("oid").(string)
|
|
log = d.log.With(zap.String("cid", idCnr), zap.String("oid", idObj))
|
|
)
|
|
|
|
ctx := utils.GetContextFromRequest(c)
|
|
|
|
cnrID, err := utils.GetContainerID(ctx, idCnr, d.containerResolver)
|
|
if err != nil {
|
|
log.Error("wrong container id", zap.Error(err))
|
|
response.Error(c, "wrong container id", fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
objID := new(oid.ID)
|
|
if err = objID.DecodeString(idObj); err != nil {
|
|
log.Error("wrong object id", zap.Error(err))
|
|
response.Error(c, "wrong object id", fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
var addr oid.Address
|
|
addr.SetContainer(*cnrID)
|
|
addr.SetObject(*objID)
|
|
|
|
f(ctx, *d.newRequest(c, log), d.pool, addr)
|
|
}
|
|
|
|
// byBucketname is a wrapper for function (e.g. request.headObject, request.receiveFile) that
|
|
// prepares request and object address to it.
|
|
func (d *Downloader) byBucketname(req *fasthttp.RequestCtx, f func(context.Context, request, *pool.Pool, oid.Address)) {
|
|
var (
|
|
bucketname = req.UserValue("cid").(string)
|
|
key = req.UserValue("oid").(string)
|
|
log = d.log.With(zap.String("bucketname", bucketname), zap.String("key", key))
|
|
)
|
|
|
|
ctx := utils.GetContextFromRequest(req)
|
|
|
|
cnrID, err := utils.GetContainerID(ctx, bucketname, d.containerResolver)
|
|
if err != nil {
|
|
log.Error("wrong container id", zap.Error(err))
|
|
response.Error(req, "wrong container id", fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
foundOid, err := d.tree.GetLatestVersion(ctx, cnrID, key)
|
|
if err != nil {
|
|
log.Error("object wasn't found", zap.Error(err))
|
|
response.Error(req, "object wasn't found", fasthttp.StatusNotFound)
|
|
return
|
|
}
|
|
if foundOid.DeleteMarker {
|
|
log.Error("object was deleted")
|
|
response.Error(req, "object deleted", fasthttp.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
var addr oid.Address
|
|
addr.SetContainer(*cnrID)
|
|
addr.SetObject(foundOid.OID)
|
|
|
|
f(ctx, *d.newRequest(req, log), d.pool, addr)
|
|
}
|
|
|
|
// DownloadByAttribute handles attribute-based download requests.
|
|
func (d *Downloader) DownloadByAttribute(c *fasthttp.RequestCtx) {
|
|
d.byAttribute(c, receiveFile)
|
|
}
|
|
|
|
// byAttribute is a wrapper similar to byAddress.
|
|
func (d *Downloader) byAttribute(c *fasthttp.RequestCtx, f func(context.Context, request, *pool.Pool, oid.Address)) {
|
|
var (
|
|
scid, _ = c.UserValue("cid").(string)
|
|
key, _ = url.QueryUnescape(c.UserValue("attr_key").(string))
|
|
val, _ = url.QueryUnescape(c.UserValue("attr_val").(string))
|
|
log = d.log.With(zap.String("cid", scid), zap.String("attr_key", key), zap.String("attr_val", val))
|
|
)
|
|
|
|
ctx := utils.GetContextFromRequest(c)
|
|
|
|
containerID, err := utils.GetContainerID(ctx, scid, d.containerResolver)
|
|
if err != nil {
|
|
log.Error("wrong container id", zap.Error(err))
|
|
response.Error(c, "wrong container id", fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
res, err := d.search(ctx, containerID, key, val, object.MatchStringEqual)
|
|
if err != nil {
|
|
log.Error("could not search for objects", zap.Error(err))
|
|
response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
defer res.Close()
|
|
|
|
buf := make([]oid.ID, 1)
|
|
|
|
n, err := res.Read(buf)
|
|
if n == 0 {
|
|
if errors.Is(err, io.EOF) {
|
|
log.Error("object not found", zap.Error(err))
|
|
response.Error(c, "object not found", fasthttp.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
log.Error("read object list failed", zap.Error(err))
|
|
response.Error(c, "read object list failed: "+err.Error(), fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
var addrObj oid.Address
|
|
addrObj.SetContainer(*containerID)
|
|
addrObj.SetObject(buf[0])
|
|
|
|
f(ctx, *d.newRequest(c, log), d.pool, addrObj)
|
|
}
|
|
|
|
func (d *Downloader) search(ctx context.Context, cid *cid.ID, key, val string, op object.SearchMatchType) (pool.ResObjectSearch, error) {
|
|
filters := object.NewSearchFilters()
|
|
filters.AddRootFilter()
|
|
filters.AddFilter(key, val, op)
|
|
|
|
var prm pool.PrmObjectSearch
|
|
prm.SetContainerID(*cid)
|
|
prm.SetFilters(filters)
|
|
if btoken := bearerToken(ctx); btoken != nil {
|
|
prm.UseBearer(*btoken)
|
|
}
|
|
|
|
return d.pool.SearchObjects(ctx, prm)
|
|
}
|
|
|
|
func (d *Downloader) getContainer(ctx context.Context, cnrID cid.ID) (container.Container, error) {
|
|
var prm pool.PrmContainerGet
|
|
prm.SetContainerID(cnrID)
|
|
|
|
return d.pool.GetContainer(ctx, prm)
|
|
}
|
|
|
|
func (d *Downloader) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, error) {
|
|
method := zip.Store
|
|
if d.settings.ZipCompression() {
|
|
method = zip.Deflate
|
|
}
|
|
|
|
filePath := getZipFilePath(obj)
|
|
if len(filePath) == 0 || filePath[len(filePath)-1] == '/' {
|
|
return nil, fmt.Errorf("invalid filepath '%s'", filePath)
|
|
}
|
|
|
|
return zw.CreateHeader(&zip.FileHeader{
|
|
Name: filePath,
|
|
Method: method,
|
|
Modified: time.Now(),
|
|
})
|
|
}
|
|
|
|
// DownloadZipped handles zip by prefix requests.
|
|
func (d *Downloader) DownloadZipped(c *fasthttp.RequestCtx) {
|
|
scid, _ := c.UserValue("cid").(string)
|
|
prefix, _ := url.QueryUnescape(c.UserValue("prefix").(string))
|
|
log := d.log.With(zap.String("cid", scid), zap.String("prefix", prefix))
|
|
|
|
ctx := utils.GetContextFromRequest(c)
|
|
|
|
containerID, err := utils.GetContainerID(ctx, scid, d.containerResolver)
|
|
if err != nil {
|
|
log.Error("wrong container id", zap.Error(err))
|
|
response.Error(c, "wrong container id", fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// check if container exists here to be able to return 404 error,
|
|
// otherwise we get this error only in object iteration step
|
|
// and client get 200 OK.
|
|
if _, err = d.getContainer(ctx, *containerID); err != nil {
|
|
log.Error("could not check container existence", zap.Error(err))
|
|
if client.IsErrContainerNotFound(err) {
|
|
response.Error(c, "Not Found", fasthttp.StatusNotFound)
|
|
return
|
|
}
|
|
response.Error(c, "could not check container existence: "+err.Error(), fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
resSearch, err := d.search(ctx, containerID, object.AttributeFilePath, prefix, object.MatchCommonPrefix)
|
|
if err != nil {
|
|
log.Error("could not search for objects", zap.Error(err))
|
|
response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip")
|
|
c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"")
|
|
c.Response.SetStatusCode(http.StatusOK)
|
|
|
|
c.SetBodyStreamWriter(func(w *bufio.Writer) {
|
|
defer resSearch.Close()
|
|
|
|
zipWriter := zip.NewWriter(w)
|
|
|
|
var bufZip []byte
|
|
var addr oid.Address
|
|
|
|
empty := true
|
|
called := false
|
|
btoken := bearerToken(ctx)
|
|
addr.SetContainer(*containerID)
|
|
|
|
errIter := resSearch.Iterate(func(id oid.ID) bool {
|
|
called = true
|
|
|
|
if empty {
|
|
bufZip = make([]byte, 3<<20) // the same as for upload
|
|
}
|
|
empty = false
|
|
|
|
addr.SetObject(id)
|
|
if err = d.zipObject(ctx, zipWriter, addr, btoken, bufZip); err != nil {
|
|
log.Error("failed to add object to archive", zap.String("oid", id.EncodeToString()), zap.Error(err))
|
|
}
|
|
|
|
return false
|
|
})
|
|
if errIter != nil {
|
|
log.Error("iterating over selected objects failed", zap.Error(errIter))
|
|
} else if !called {
|
|
log.Error("objects not found")
|
|
}
|
|
|
|
if err = zipWriter.Close(); err != nil {
|
|
log.Error("close zip writer", zap.Error(err))
|
|
}
|
|
})
|
|
}
|
|
|
|
func (d *Downloader) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid.Address, btoken *bearer.Token, bufZip []byte) error {
|
|
var prm pool.PrmObjectGet
|
|
prm.SetAddress(addr)
|
|
if btoken != nil {
|
|
prm.UseBearer(*btoken)
|
|
}
|
|
|
|
resGet, err := d.pool.GetObject(ctx, prm)
|
|
if err != nil {
|
|
return fmt.Errorf("get FrostFS object: %v", err)
|
|
}
|
|
|
|
objWriter, err := d.addObjectToZip(zipWriter, &resGet.Header)
|
|
if err != nil {
|
|
return fmt.Errorf("zip create header: %v", err)
|
|
}
|
|
|
|
if _, err = io.CopyBuffer(objWriter, resGet.Payload, bufZip); err != nil {
|
|
return fmt.Errorf("copy object payload to zip file: %v", err)
|
|
}
|
|
|
|
if err = resGet.Payload.Close(); err != nil {
|
|
return fmt.Errorf("object body close error: %w", err)
|
|
}
|
|
|
|
if err = zipWriter.Flush(); err != nil {
|
|
return fmt.Errorf("flush zip writer: %v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func getZipFilePath(obj *object.Object) string {
|
|
for _, attr := range obj.Attributes() {
|
|
if attr.Key() == object.AttributeFilePath {
|
|
return attr.Value()
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|