package downloader import ( "archive/zip" "bufio" "bytes" "context" "errors" "fmt" "io" "net/http" "net/url" "path" "strconv" "strings" "time" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/resolver" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/response" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/tree" "git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/bearer" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool" "github.com/valyala/fasthttp" "go.uber.org/atomic" "go.uber.org/zap" ) type request struct { *fasthttp.RequestCtx log *zap.Logger } func isValidToken(s string) bool { for _, c := range s { if c <= ' ' || c > 127 { return false } if strings.ContainsRune("()<>@,;:\\\"/[]?={}", c) { return false } } return true } func isValidValue(s string) bool { for _, c := range s { // HTTP specification allows for more technically, but we don't want to escape things. if c < ' ' || c > 127 || c == '"' { return false } } return true } type readCloser struct { io.Reader io.Closer } // initializes io.Reader with the limited size and detects Content-Type from it. // Returns r's error directly. Also returns the processed data. func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error)) (string, []byte, error) { if maxSize > sizeToDetectType { maxSize = sizeToDetectType } buf := make([]byte, maxSize) // maybe sync-pool the slice? r, err := rInit(maxSize) if err != nil { return "", nil, err } n, err := r.Read(buf) if err != nil && err != io.EOF { return "", nil, err } buf = buf[:n] return http.DetectContentType(buf), buf, err // to not lose io.EOF } func receiveFile(ctx context.Context, req request, clnt *pool.Pool, objectAddress oid.Address) { var ( err error dis = "inline" start = time.Now() filename string ) var prm pool.PrmObjectGet prm.SetAddress(objectAddress) if btoken := bearerToken(ctx); btoken != nil { prm.UseBearer(*btoken) } rObj, err := clnt.GetObject(ctx, prm) if err != nil { req.handleFrostFSErr(err, start) return } // we can't close reader in this function, so how to do it? if req.Request.URI().QueryArgs().GetBool("download") { dis = "attachment" } payloadSize := rObj.Header.PayloadSize() req.Response.Header.Set(fasthttp.HeaderContentLength, strconv.FormatUint(payloadSize, 10)) var contentType string for _, attr := range rObj.Header.Attributes() { key := attr.Key() val := attr.Value() if !isValidToken(key) || !isValidValue(val) { continue } key = utils.BackwardTransformIfSystem(key) req.Response.Header.Set(utils.UserAttributeHeaderPrefix+key, val) switch key { case object.AttributeFileName: filename = val case object.AttributeTimestamp: value, err := strconv.ParseInt(val, 10, 64) if err != nil { req.log.Info("couldn't parse creation date", zap.String("key", key), zap.String("val", val), zap.Error(err)) continue } req.Response.Header.Set(fasthttp.HeaderLastModified, time.Unix(value, 0).UTC().Format(http.TimeFormat)) case object.AttributeContentType: contentType = val } } idsToResponse(&req.Response, &rObj.Header) if len(contentType) == 0 { // determine the Content-Type from the payload head var payloadHead []byte contentType, payloadHead, err = readContentType(payloadSize, func(uint64) (io.Reader, error) { return rObj.Payload, nil }) if err != nil && err != io.EOF { req.log.Error("could not detect Content-Type from payload", zap.Error(err)) response.Error(req.RequestCtx, "could not detect Content-Type from payload: "+err.Error(), fasthttp.StatusBadRequest) return } // reset payload reader since a part of the data has been read var headReader io.Reader = bytes.NewReader(payloadHead) if err != io.EOF { // otherwise, we've already read full payload headReader = io.MultiReader(headReader, rObj.Payload) } // note: we could do with io.Reader, but SetBodyStream below closes body stream // if it implements io.Closer and that's useful for us. rObj.Payload = readCloser{headReader, rObj.Payload} } req.SetContentType(contentType) req.Response.Header.Set(fasthttp.HeaderContentDisposition, dis+"; filename="+path.Base(filename)) req.Response.SetBodyStream(rObj.Payload, int(payloadSize)) } func bearerToken(ctx context.Context) *bearer.Token { if tkn, err := tokens.LoadBearerToken(ctx); err == nil { return tkn } return nil } func (r *request) handleFrostFSErr(err error, start time.Time) { logFields := []zap.Field{ zap.Stringer("elapsed", time.Since(start)), zap.Error(err), } statusCode, msg, additionalFields := response.FormErrorResponse("could not receive object", err) logFields = append(logFields, additionalFields...) r.log.Error("could not receive object", logFields...) response.Error(r.RequestCtx, msg, statusCode) } // Downloader is a download request handler. type Downloader struct { log *zap.Logger pool *pool.Pool containerResolver *resolver.ContainerResolver settings *Settings tree *tree.Tree } // Settings stores reloading parameters, so it has to provide atomic getters and setters. type Settings struct { zipCompression atomic.Bool } func (s *Settings) ZipCompression() bool { return s.zipCompression.Load() } func (s *Settings) SetZipCompression(val bool) { s.zipCompression.Store(val) } // New creates an instance of Downloader using specified options. func New(params *utils.AppParams, settings *Settings, tree *tree.Tree) *Downloader { return &Downloader{ log: params.Logger, pool: params.Pool, settings: settings, containerResolver: params.Resolver, tree: tree, } } func (d *Downloader) newRequest(ctx *fasthttp.RequestCtx, log *zap.Logger) *request { return &request{ RequestCtx: ctx, log: log, } } // DownloadByAddressOrBucketName handles download requests using simple cid/oid or bucketname/key format. func (d *Downloader) DownloadByAddressOrBucketName(c *fasthttp.RequestCtx) { test, _ := c.UserValue("oid").(string) var id oid.ID err := id.DecodeString(test) if err != nil { d.byBucketname(c, receiveFile) } else { d.byAddress(c, receiveFile) } } // byAddress is a wrapper for function (e.g. request.headObject, request.receiveFile) that // prepares request and object address to it. func (d *Downloader) byAddress(c *fasthttp.RequestCtx, f func(context.Context, request, *pool.Pool, oid.Address)) { var ( idCnr, _ = c.UserValue("cid").(string) idObj, _ = c.UserValue("oid").(string) log = d.log.With(zap.String("cid", idCnr), zap.String("oid", idObj)) ) ctx := utils.GetContextFromRequest(c) cnrID, err := utils.GetContainerID(ctx, idCnr, d.containerResolver) if err != nil { log.Error("wrong container id", zap.Error(err)) response.Error(c, "wrong container id", fasthttp.StatusBadRequest) return } objID := new(oid.ID) if err = objID.DecodeString(idObj); err != nil { log.Error("wrong object id", zap.Error(err)) response.Error(c, "wrong object id", fasthttp.StatusBadRequest) return } var addr oid.Address addr.SetContainer(*cnrID) addr.SetObject(*objID) f(ctx, *d.newRequest(c, log), d.pool, addr) } // byBucketname is a wrapper for function (e.g. request.headObject, request.receiveFile) that // prepares request and object address to it. func (d *Downloader) byBucketname(req *fasthttp.RequestCtx, f func(context.Context, request, *pool.Pool, oid.Address)) { var ( bucketname = req.UserValue("cid").(string) key = req.UserValue("oid").(string) log = d.log.With(zap.String("bucketname", bucketname), zap.String("key", key)) ) ctx := utils.GetContextFromRequest(req) cnrID, err := utils.GetContainerID(ctx, bucketname, d.containerResolver) if err != nil { log.Error("wrong container id", zap.Error(err)) response.Error(req, "wrong container id", fasthttp.StatusBadRequest) return } foundOid, err := d.tree.GetLatestVersion(ctx, cnrID, key) if err != nil { log.Error("object wasn't found", zap.Error(err)) response.Error(req, "object wasn't found", fasthttp.StatusNotFound) return } if foundOid.DeleteMarker { log.Error("object was deleted") response.Error(req, "object deleted", fasthttp.StatusNotFound) return } var addr oid.Address addr.SetContainer(*cnrID) addr.SetObject(foundOid.OID) f(ctx, *d.newRequest(req, log), d.pool, addr) } // DownloadByAttribute handles attribute-based download requests. func (d *Downloader) DownloadByAttribute(c *fasthttp.RequestCtx) { d.byAttribute(c, receiveFile) } // byAttribute is a wrapper similar to byAddress. func (d *Downloader) byAttribute(c *fasthttp.RequestCtx, f func(context.Context, request, *pool.Pool, oid.Address)) { var ( scid, _ = c.UserValue("cid").(string) key, _ = url.QueryUnescape(c.UserValue("attr_key").(string)) val, _ = url.QueryUnescape(c.UserValue("attr_val").(string)) log = d.log.With(zap.String("cid", scid), zap.String("attr_key", key), zap.String("attr_val", val)) ) ctx := utils.GetContextFromRequest(c) containerID, err := utils.GetContainerID(ctx, scid, d.containerResolver) if err != nil { log.Error("wrong container id", zap.Error(err)) response.Error(c, "wrong container id", fasthttp.StatusBadRequest) return } res, err := d.search(ctx, containerID, key, val, object.MatchStringEqual) if err != nil { log.Error("could not search for objects", zap.Error(err)) response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest) return } defer res.Close() buf := make([]oid.ID, 1) n, err := res.Read(buf) if n == 0 { if errors.Is(err, io.EOF) { log.Error("object not found", zap.Error(err)) response.Error(c, "object not found", fasthttp.StatusNotFound) return } log.Error("read object list failed", zap.Error(err)) response.Error(c, "read object list failed: "+err.Error(), fasthttp.StatusBadRequest) return } var addrObj oid.Address addrObj.SetContainer(*containerID) addrObj.SetObject(buf[0]) f(ctx, *d.newRequest(c, log), d.pool, addrObj) } func (d *Downloader) search(ctx context.Context, cid *cid.ID, key, val string, op object.SearchMatchType) (pool.ResObjectSearch, error) { filters := object.NewSearchFilters() filters.AddRootFilter() filters.AddFilter(key, val, op) var prm pool.PrmObjectSearch prm.SetContainerID(*cid) prm.SetFilters(filters) if btoken := bearerToken(ctx); btoken != nil { prm.UseBearer(*btoken) } return d.pool.SearchObjects(ctx, prm) } func (d *Downloader) getContainer(ctx context.Context, cnrID cid.ID) (container.Container, error) { var prm pool.PrmContainerGet prm.SetContainerID(cnrID) return d.pool.GetContainer(ctx, prm) } func (d *Downloader) addObjectToZip(zw *zip.Writer, obj *object.Object) (io.Writer, error) { method := zip.Store if d.settings.ZipCompression() { method = zip.Deflate } filePath := getZipFilePath(obj) if len(filePath) == 0 || filePath[len(filePath)-1] == '/' { return nil, fmt.Errorf("invalid filepath '%s'", filePath) } return zw.CreateHeader(&zip.FileHeader{ Name: filePath, Method: method, Modified: time.Now(), }) } // DownloadZipped handles zip by prefix requests. func (d *Downloader) DownloadZipped(c *fasthttp.RequestCtx) { scid, _ := c.UserValue("cid").(string) prefix, _ := url.QueryUnescape(c.UserValue("prefix").(string)) log := d.log.With(zap.String("cid", scid), zap.String("prefix", prefix)) ctx := utils.GetContextFromRequest(c) containerID, err := utils.GetContainerID(ctx, scid, d.containerResolver) if err != nil { log.Error("wrong container id", zap.Error(err)) response.Error(c, "wrong container id", fasthttp.StatusBadRequest) return } // check if container exists here to be able to return 404 error, // otherwise we get this error only in object iteration step // and client get 200 OK. if _, err = d.getContainer(ctx, *containerID); err != nil { log.Error("could not check container existence", zap.Error(err)) if client.IsErrContainerNotFound(err) { response.Error(c, "Not Found", fasthttp.StatusNotFound) return } response.Error(c, "could not check container existence: "+err.Error(), fasthttp.StatusBadRequest) return } resSearch, err := d.search(ctx, containerID, object.AttributeFilePath, prefix, object.MatchCommonPrefix) if err != nil { log.Error("could not search for objects", zap.Error(err)) response.Error(c, "could not search for objects: "+err.Error(), fasthttp.StatusBadRequest) return } c.Response.Header.Set(fasthttp.HeaderContentType, "application/zip") c.Response.Header.Set(fasthttp.HeaderContentDisposition, "attachment; filename=\"archive.zip\"") c.Response.SetStatusCode(http.StatusOK) c.SetBodyStreamWriter(func(w *bufio.Writer) { defer resSearch.Close() zipWriter := zip.NewWriter(w) var bufZip []byte var addr oid.Address empty := true called := false btoken := bearerToken(ctx) addr.SetContainer(*containerID) errIter := resSearch.Iterate(func(id oid.ID) bool { called = true if empty { bufZip = make([]byte, 3<<20) // the same as for upload } empty = false addr.SetObject(id) if err = d.zipObject(ctx, zipWriter, addr, btoken, bufZip); err != nil { log.Error("failed to add object to archive", zap.String("oid", id.EncodeToString()), zap.Error(err)) } return false }) if errIter != nil { log.Error("iterating over selected objects failed", zap.Error(errIter)) } else if !called { log.Error("objects not found") } if err = zipWriter.Close(); err != nil { log.Error("close zip writer", zap.Error(err)) } }) } func (d *Downloader) zipObject(ctx context.Context, zipWriter *zip.Writer, addr oid.Address, btoken *bearer.Token, bufZip []byte) error { var prm pool.PrmObjectGet prm.SetAddress(addr) if btoken != nil { prm.UseBearer(*btoken) } resGet, err := d.pool.GetObject(ctx, prm) if err != nil { return fmt.Errorf("get FrostFS object: %v", err) } objWriter, err := d.addObjectToZip(zipWriter, &resGet.Header) if err != nil { return fmt.Errorf("zip create header: %v", err) } if _, err = io.CopyBuffer(objWriter, resGet.Payload, bufZip); err != nil { return fmt.Errorf("copy object payload to zip file: %v", err) } if err = resGet.Payload.Close(); err != nil { return fmt.Errorf("object body close error: %w", err) } if err = zipWriter.Flush(); err != nil { return fmt.Errorf("flush zip writer: %v", err) } return nil } func getZipFilePath(obj *object.Object) string { for _, attr := range obj.Attributes() { if attr.Key() == object.AttributeFilePath { return attr.Value() } } return "" }