[#137] Add index page support #141

Merged
alexvanin merged 1 commit from nzinkevich/frostfs-http-gw:feature/137/index_page into master 2024-10-04 14:52:16 +00:00
20 changed files with 738 additions and 80 deletions

View file

@ -6,6 +6,7 @@ import (
"crypto/x509"
"errors"
"fmt"
"io"
"net/http"
"os"
"os/signal"
@ -23,6 +24,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/handler"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/handler/middleware"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/templates"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/metrics"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/resolver"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
@ -91,6 +93,8 @@ type (
defaultTimestamp bool
zipCompression bool
clientCut bool
dkirillov marked this conversation as resolved Outdated

Why do we need this field in appSettings?

Why do we need this field in `appSettings`?
returnIndexPage bool

Why do we actually need this template be reloadable?

Why do we actually need this template be reloadable?
https://git.frostfs.info/TrueCloudLab/frostfs-http-gw/pulls/141#issuecomment-51270

That comment doesn't answer about realoadability

That comment doesn't answer about realoadability

Why we should disable reloadability? Do you think that it's critical for reload performance?

Why we should disable reloadability? Do you think that it's critical for reload performance?
indexPageTemplate string
bufferMaxSizeForPut uint64
namespaceHeader string
defaultNamespaces []string
@ -155,6 +159,7 @@ func newApp(ctx context.Context, opt ...Option) App {
a.initResolver()
a.initMetrics()
a.initTracing(ctx)
a.loadIndexPageTemplate()
return a
}
@ -177,12 +182,59 @@ func (s *appSettings) ZipCompression() bool {
return s.zipCompression
}
func (s *appSettings) IndexPageEnabled() bool {
s.mu.RLock()
defer s.mu.RUnlock()
return s.returnIndexPage
}
func (s *appSettings) IndexPageTemplate() string {
s.mu.RLock()
defer s.mu.RUnlock()
if s.indexPageTemplate == "" {
return templates.DefaultIndexTemplate
}
return s.indexPageTemplate
}
func (s *appSettings) setZipCompression(val bool) {
s.mu.Lock()
s.zipCompression = val
s.mu.Unlock()
}
func (s *appSettings) setReturnIndexPage(val bool) {
s.mu.Lock()
s.returnIndexPage = val
s.mu.Unlock()
}
func (s *appSettings) setIndexTemplate(val string) {
s.mu.Lock()
s.indexPageTemplate = val
s.mu.Unlock()
}
func (a *app) loadIndexPageTemplate() {
if !a.settings.IndexPageEnabled() {
return
}
reader, err := os.Open(a.cfg.GetString(cfgIndexPageTemplatePath))
if err != nil {
a.settings.setIndexTemplate("")
a.log.Warn(logs.FailedToReadIndexPageTemplate, zap.Error(err))
return
}
tmpl, err := io.ReadAll(reader)
if err != nil {
a.settings.setIndexTemplate("")
a.log.Warn(logs.FailedToReadIndexPageTemplate, zap.Error(err))
return
}
a.settings.setIndexTemplate(string(tmpl))
a.log.Info(logs.SetCustomIndexPageTemplate)
}
func (s *appSettings) ClientCut() bool {
s.mu.RLock()
defer s.mu.RUnlock()
@ -491,6 +543,7 @@ func (a *app) configReload(ctx context.Context) {
a.metrics.SetEnabled(a.cfg.GetBool(cfgPrometheusEnabled))
a.initTracing(ctx)
a.loadIndexPageTemplate()
a.setHealthStatus()
a.log.Info(logs.SIGHUPConfigReloadCompleted)
@ -499,6 +552,7 @@ func (a *app) configReload(ctx context.Context) {
func (a *app) updateSettings() {
a.settings.setDefaultTimestamp(a.cfg.GetBool(cfgUploaderHeaderEnableDefaultTimestamp))
a.settings.setZipCompression(a.cfg.GetBool(cfgZipCompression))
a.settings.setReturnIndexPage(a.cfg.GetBool(cfgIndexPageEnabled))
a.settings.setClientCut(a.cfg.GetBool(cfgClientCut))
a.settings.setBufferMaxSizeForPut(a.cfg.GetUint64(cfgBufferMaxSizeForPut))
a.settings.setNamespaceHeader(a.cfg.GetString(cfgResolveNamespaceHeader))

View file

@ -62,6 +62,9 @@ const (
cfgReconnectInterval = "reconnect_interval"
cfgIndexPageEnabled = "index_page.enabled"
cfgIndexPageTemplatePath = "index_page.template_path"
// Web.
cfgWebReadBufferSize = "web.read_buffer_size"
cfgWebWriteBufferSize = "web.write_buffer_size"
@ -203,6 +206,9 @@ func settings() *viper.Viper {
// pool:
v.SetDefault(cfgPoolErrorThreshold, defaultPoolErrorThreshold)
v.SetDefault(cfgIndexPageEnabled, false)
v.SetDefault(cfgIndexPageTemplatePath, "")
// frostfs:
v.SetDefault(cfgBufferMaxSizeForPut, defaultBufferMaxSizeForPut)

View file

@ -107,6 +107,11 @@ request_timeout: 5s # Timeout to check node health during rebalance.
rebalance_timer: 30s # Interval to check nodes health.
dkirillov marked this conversation as resolved Outdated

Maybe it's better to provide path to real default config? internal/handler/templates/index.gotmpl

Maybe it's better to provide path to real default config? `internal/handler/templates/index.gotmpl`
pool_error_threshold: 100 # The number of errors on connection after which node is considered as unhealthy.
# Enable index page to see objects list for specified container and prefix
index_page:
enabled: false
template_path: internal/handler/templates/index.gotmpl
zip:
compression: false # Enable zip compression to download files by common prefix.
@ -132,4 +137,4 @@ cache:
resolve_bucket:
namespace_header: X-Frostfs-Namespace
default_namespaces: [ "", "root" ]
default_namespaces: [ "", "root" ]

View file

@ -95,12 +95,12 @@ The `filename` field from the multipart form will be set as `FileName` attribute
## Get object
Route: `/get/{cid}/{oid}?[download=true]`
Route: `/get/{cid}/{oid}?[download=false]`
| Route parameter | Type | Description |
|-----------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `cid` | Single | Base58 encoded container ID or container name from NNS. |
| `oid` | Single | Base58 encoded object ID. |
| `cid` | Single | Base58 encoded `container ID` or `container name` from NNS or `bucket name`. |
| `oid` | Single | Base58 encoded `object ID`. Also could be `S3 object name` if `cid` is specified as bucket name. |
| `download` | Query | Set the `Content-Disposition` header as `attachment` in response.<br/> This make the browser to download object as file instead of showing it on the page. |
### Methods
@ -141,6 +141,13 @@ Get an object (payload and attributes) by an address.
| 400 | Some error occurred during object downloading. |
| 404 | Container or object not found. |
###### Body
Returns object data. If request performed from browser, either displays raw data or downloads it as
attachment if `download` query parameter is set to `true`.
If `index_page.enabled` is set to `true`, returns HTML with index-page if no object with specified
S3-name was found.
#### HEAD
Get an object attributes by an address.

View file

@ -57,6 +57,7 @@ $ cat http.log
| `frostfs` | [Frostfs configuration](#frostfs-section) |
| `cache` | [Cache configuration](#cache-section) |
| `resolve_bucket` | [Bucket name resolving configuration](#resolve_bucket-section) |
| `index_page` | [Index page configuration](#index_page-section) |
# General section
@ -75,16 +76,16 @@ pool_error_threshold: 100
reconnect_interval: 1m
```
| Parameter | Type | SIGHUP reload | Default value | Description |
|------------------------|------------|---------------|----------------|------------------------------------------------------------------------------------|
| `rpc_endpoint` | `string` | yes | | The address of the RPC host to which the gateway connects to resolve bucket names. |
| `resolve_order` | `[]string` | yes | `[nns, dns]` | Order of bucket name resolvers to use. |
| `connect_timeout` | `duration` | | `10s` | Timeout to connect to a node. |
| `stream_timeout` | `duration` | | `10s` | Timeout for individual operations in streaming RPC. |
| `request_timeout` | `duration` | | `15s` | Timeout to check node health during rebalance. |
| `rebalance_timer` | `duration` | | `60s` | Interval to check node health. |
| `pool_error_threshold` | `uint32` | | `100` | The number of errors on connection after which node is considered as unhealthy. |
| `reconnect_interval` | `duration` | no | `1m` | Listeners reconnection interval. |
| Parameter | Type | SIGHUP reload | Default value | Description |
|------------------------|------------|---------------|---------------|-------------------------------------------------------------------------------------------------|
| `rpc_endpoint` | `string` | yes | | The address of the RPC host to which the gateway connects to resolve bucket names. |
| `resolve_order` | `[]string` | yes | `[nns, dns]` | Order of bucket name resolvers to use. |
| `connect_timeout` | `duration` | | `10s` | Timeout to connect to a node. |
| `stream_timeout` | `duration` | | `10s` | Timeout for individual operations in streaming RPC. |
| `request_timeout` | `duration` | | `15s` | Timeout to check node health during rebalance. |
| `rebalance_timer` | `duration` | | `60s` | Interval to check node health. |
| `pool_error_threshold` | `uint32` | | `100` | The number of errors on connection after which node is considered as unhealthy. |
| `reconnect_interval` | `duration` | no | `1m` | Listeners reconnection interval. |
# `wallet` section
@ -346,4 +347,19 @@ resolve_bucket:
| Parameter | Type | SIGHUP reload | Default value | Description |
|----------------------|------------|---------------|-----------------------|--------------------------------------------------------------------------------------------------------------------------|
| `namespace_header` | `string` | yes | `X-Frostfs-Namespace` | Header to determine zone to resolve bucket name. |
| `default_namespaces` | `[]string` | yes | ["","root"] | Namespaces that should be handled as default. |
| `default_namespaces` | `[]string` | yes | ["","root"] | Namespaces that should be handled as default. |
# `index_page` section
Parameters for index HTML-page output with S3-bucket or S3-subdir content for `Get object` request
```yaml
index_page:
enabled: false
template_path: ""
```
| Parameter | Type | SIGHUP reload | Default value | Description |
|-----------------|----------|---------------|---------------|---------------------------------------------------------------------------------|
| `enabled` | `bool` | yes | `false` | Flag to enable index_page return if no object with specified S3-name was found. |
| `template_path` | `string` | yes | `""` | Path to .gotmpl file with html template for index_page. |

2
go.mod
View file

@ -8,6 +8,7 @@ require (
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240918095938-e580ee991d98
git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02
github.com/bluele/gcache v0.0.2
github.com/docker/go-units v0.4.0
github.com/fasthttp/router v1.4.1
github.com/nspcc-dev/neo-go v0.106.2
github.com/prometheus/client_golang v1.19.0
@ -50,7 +51,6 @@ require (
github.com/docker/distribution v2.8.1+incompatible // indirect
github.com/docker/docker v20.10.14+incompatible // indirect
github.com/docker/go-connections v0.4.0 // indirect
github.com/docker/go-units v0.4.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect

View file

@ -8,6 +8,7 @@ import (
type NodeVersion struct {
BaseNodeVersion
DeleteMarker bool
IsPrefixNode bool
}
// BaseNodeVersion is minimal node info from tree service.

View file

@ -4,7 +4,9 @@ import (
"context"
"errors"
"fmt"
"io"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tree"
treepool "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool/tree"
@ -15,16 +17,16 @@ type GetNodeByPathResponseInfoWrapper struct {
response *grpcService.GetNodeByPathResponse_Info
}
func (n GetNodeByPathResponseInfoWrapper) GetNodeID() uint64 {
return n.response.GetNodeId()
func (n GetNodeByPathResponseInfoWrapper) GetNodeID() []uint64 {
return []uint64{n.response.GetNodeId()}
}
func (n GetNodeByPathResponseInfoWrapper) GetParentID() uint64 {
return n.response.GetParentId()
func (n GetNodeByPathResponseInfoWrapper) GetParentID() []uint64 {
alexvanin marked this conversation as resolved Outdated

I suggest to synchronize this also with s3-gw and return []uint64. We did this to fix some tree service split issues. We would like to keep this code similar.

I suggest to synchronize this also with s3-gw and return `[]uint64`. We did this to fix some tree service split issues. We would like to keep this code similar.
return []uint64{n.response.GetParentId()}
}
func (n GetNodeByPathResponseInfoWrapper) GetTimestamp() uint64 {
return n.response.GetTimestamp()
func (n GetNodeByPathResponseInfoWrapper) GetTimestamp() []uint64 {
return []uint64{n.response.GetTimestamp()}
}
func (n GetNodeByPathResponseInfoWrapper) GetMeta() []tree.Meta {
@ -89,3 +91,73 @@ func handleError(err error) error {
return err
}
func (w *PoolWrapper) GetSubTree(ctx context.Context, bktInfo *data.BucketInfo, treeID string, rootID []uint64, depth uint32, sort bool) ([]tree.NodeResponse, error) {
order := treepool.NoneOrder
if sort {
order = treepool.AscendingOrder
}
poolPrm := treepool.GetSubTreeParams{
CID: bktInfo.CID,
TreeID: treeID,
RootID: rootID,
Depth: depth,
BearerToken: getBearer(ctx),
Order: order,
}
if len(rootID) == 1 && rootID[0] == 0 {
// storage node interprets 'nil' value as []uint64{0}
// gate wants to send 'nil' value instead of []uint64{0}, because
// it provides compatibility with previous tree service api where
// single uint64(0) value is dropped from signature
poolPrm.RootID = nil
}
subTreeReader, err := w.p.GetSubTree(ctx, poolPrm)
if err != nil {
return nil, handleError(err)
}
var subtree []tree.NodeResponse
node, err := subTreeReader.Next()
for err == nil {
subtree = append(subtree, GetSubTreeResponseBodyWrapper{node})
node, err = subTreeReader.Next()
}
if err != io.EOF {
return nil, handleError(err)
}
return subtree, nil
}
type GetSubTreeResponseBodyWrapper struct {
response *grpcService.GetSubTreeResponse_Body
}
func (n GetSubTreeResponseBodyWrapper) GetNodeID() []uint64 {
return n.response.GetNodeId()
}
func (n GetSubTreeResponseBodyWrapper) GetParentID() []uint64 {
resp := n.response.GetParentId()
if resp == nil {
// storage sends nil that should be interpreted as []uint64{0}
// due to protobuf compatibility, see 'GetSubTree' function
return []uint64{0}
}
return resp
}
func (n GetSubTreeResponseBodyWrapper) GetTimestamp() []uint64 {
return n.response.GetTimestamp()
}
func (n GetSubTreeResponseBodyWrapper) GetMeta() []tree.Meta {
res := make([]tree.Meta, len(n.response.Meta))
for i, value := range n.response.Meta {
res[i] = value
}
return res
}

157
internal/handler/browse.go Normal file
View file

@ -0,0 +1,157 @@
package handler
import (
"html/template"
"net/url"
"sort"
"strconv"
"strings"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/utils"
"github.com/docker/go-units"
"github.com/valyala/fasthttp"
"go.uber.org/zap"
)
const (
dateFormat = "02-01-2006 15:04"
attrOID = "OID"
attrCreated = "Created"
dkirillov marked this conversation as resolved Outdated

Let's keep each constant on separate line

Let's keep each constant on separate line
attrFileName = "FileName"
attrSize = "Size"
)
type (
BrowsePageData struct {
BucketName,
Prefix string
Objects []ResponseObject
}
ResponseObject struct {
OID string
Created string
FileName string
Size string
IsDir bool
}
)
alexvanin marked this conversation as resolved Outdated

I suggest to move default setting out of handler and put it into configuration of app.go

This way we will have single place in application that manages actual template string. It will be easier to manage it later.

What you think?

I suggest to move default setting out of handler and put it into configuration of app.go This way we will have single place in application that manages actual template string. It will be easier to manage it later. What you think?

If we move default template init to app.go, it wouldn't be so nice because go:embed allows only link files which is relative to current package (and prohibits using .. in path). Thus we should move (or at least symlink) template file to cmd/http-gw as well which I think we shouldn't do

If we move default template init to app.go, it wouldn't be so nice because `go:embed` allows only link files which is relative to current package (and prohibits using `..` in path). Thus we should move (or at least symlink) template file to cmd/http-gw as well which I think we shouldn't do

But we still can introduce separate package for reading default template. And use it in app.go

But we still can introduce separate package for reading default template. And use it in app.go
func parseTimestamp(tstamp string) (time.Time, error) {
millis, err := strconv.ParseInt(tstamp, 10, 64)
if err != nil {
return time.Time{}, err
}
return time.UnixMilli(millis), nil
}
func NewResponseObject(nodes map[string]string) ResponseObject {
return ResponseObject{
OID: nodes[attrOID],
Created: nodes[attrCreated],
FileName: nodes[attrFileName],
Size: nodes[attrSize],
IsDir: nodes[attrOID] == "",
}
}
func formatTimestamp(strdate string) string {
date, err := parseTimestamp(strdate)
if err != nil || date.IsZero() {
return ""
}
return date.Format(dateFormat)
}
func formatSize(strsize string) string {
size, err := strconv.ParseFloat(strsize, 64)
if err != nil {
alexvanin marked this conversation as resolved Outdated

Can we write direct converter from nodes to respObjects without JSON as a middle step?

Can we write direct converter from `nodes` to `respObjects` without JSON as a middle step?
return "0B"
}
return units.HumanSize(size)
}
func parentDir(prefix string) string {
index := strings.LastIndex(prefix, "/")
if index == -1 {
return prefix
}
return prefix[index:]
}
func trimPrefix(encPrefix string) string {
prefix, err := url.PathUnescape(encPrefix)
if err != nil {
return ""
}
slashIndex := strings.LastIndex(prefix, "/")
if slashIndex == -1 {
dkirillov marked this conversation as resolved Outdated

It's better to write:

log := h.log.With(zap.String("bucket", bucketName))
It's better to write: ```golang log := h.log.With(zap.String("bucket", bucketName)) ```
return ""
}

If len(nodes) == 0 probably we should return not found error

If `len(nodes) == 0` probably we should return not found error

I think we should return NotFound by default not only in that case but each time we browsing objects. Otherwise we would violate API

I think we should return NotFound by default not only in that case but each time we browsing objects. Otherwise we would violate API

Could you clarify what do you mean?

By the way, currently we don't return not found if provided path ends with / and doesn't exist

Could you clarify what do you mean? By the way, currently we don't return `not found` if provided path ends with `/` and doesn't exist

I mean that index page returns html with status code 200 in places, where API should return 404. If so, enabling index page would cause API requests to return incorrect status codes.
At first I thought that all index pages should return 404.
But it turns out, that some folders would return 200. For example, if we create object with key /foo/bar/, request /get/bucketName/foo/bar/ returns 200, but /get/bucketName/foo/ returns 404 (because foo/ stored only in a tree service I suppose)

I mean that index page returns html with status code 200 in places, where API should return 404. If so, enabling index page would cause API requests to return incorrect status codes. At first I thought that all index pages should return 404. But it turns out, that some folders would return 200. For example, if we create object with key `/foo/bar/`, request `/get/bucketName/foo/bar/` returns 200, but `/get/bucketName/foo/` returns 404 (because `foo/` stored only in a tree service I suppose)
return prefix[:slashIndex]
}
func urlencode(prefix, filename string) string {
var res strings.Builder
path := filename
if prefix != "" {
path = strings.Join([]string{prefix, filename}, "/")
}
prefixParts := strings.Split(path, "/")
for _, prefixPart := range prefixParts {
prefixPart = "/" + url.PathEscape(prefixPart)
if prefixPart == "/." || prefixPart == "/.." {
prefixPart = url.PathEscape(prefixPart)
}
res.WriteString(prefixPart)
}
return res.String()
}
func (h *Handler) browseObjects(c *fasthttp.RequestCtx, bucketInfo *data.BucketInfo, prefix string) {
log := h.log.With(zap.String("bucket", bucketInfo.Name))
ctx := utils.GetContextFromRequest(c)
nodes, err := h.listObjects(ctx, bucketInfo, prefix)
if err != nil {
logAndSendBucketError(c, log, err)
return
}
respObjects := make([]ResponseObject, len(nodes))
for i, node := range nodes {
respObjects[i] = NewResponseObject(node)
}
sort.Slice(respObjects, func(i, j int) bool {
if respObjects[i].IsDir == respObjects[j].IsDir {
return respObjects[i].FileName < respObjects[j].FileName
}
return respObjects[i].IsDir
})
indexTemplate := h.config.IndexPageTemplate()
tmpl, err := template.New("index").Funcs(template.FuncMap{
"formatTimestamp": formatTimestamp,
"formatSize": formatSize,
"trimPrefix": trimPrefix,
"urlencode": urlencode,
"parentDir": parentDir,
}).Parse(indexTemplate)
if err != nil {
logAndSendBucketError(c, log, err)
return
}
if err = tmpl.Execute(c, &BrowsePageData{
BucketName: bucketInfo.Name,
Prefix: prefix,
Objects: respObjects,
}); err != nil {
logAndSendBucketError(c, log, err)
return
}
}

View file

@ -30,6 +30,8 @@ type Config interface {
DefaultTimestamp() bool
ZipCompression() bool
ClientCut() bool
IndexPageEnabled() bool
IndexPageTemplate() string
alexvanin marked this conversation as resolved Outdated

This interface method is unused. Remove it.

This interface method is unused. Remove it.
BufferMaxSizeForPut() uint64
NamespaceHeader() string
}
@ -208,41 +210,50 @@ func (h *Handler) byAddress(c *fasthttp.RequestCtx, f func(context.Context, requ
// byObjectName is a wrapper for function (e.g. request.headObject, request.receiveFile) that
// prepares request and object address to it.
func (h *Handler) byObjectName(req *fasthttp.RequestCtx, f func(context.Context, request, oid.Address)) {
func (h *Handler) byObjectName(c *fasthttp.RequestCtx, f func(context.Context, request, oid.Address)) {
var (
bucketname = req.UserValue("cid").(string)
key = req.UserValue("oid").(string)
bucketname = c.UserValue("cid").(string)
key = c.UserValue("oid").(string)
log = h.log.With(zap.String("bucketname", bucketname), zap.String("key", key))
download = c.QueryArgs().GetBool("download")
)
unescapedKey, err := url.QueryUnescape(key)
if err != nil {
logAndSendBucketError(req, log, err)
logAndSendBucketError(c, log, err)
return
}
ctx := utils.GetContextFromRequest(req)
ctx := utils.GetContextFromRequest(c)
bktInfo, err := h.getBucketInfo(ctx, bucketname, log)
if err != nil {
logAndSendBucketError(req, log, err)
logAndSendBucketError(c, log, err)
return
}
foundOid, err := h.tree.GetLatestVersion(ctx, &bktInfo.CID, unescapedKey)
if h.config.IndexPageEnabled() && !download && string(c.Method()) != fasthttp.MethodHead {
if isDir(unescapedKey) || isContainerRoot(unescapedKey) {
if code := checkErrorType(err); code == fasthttp.StatusNotFound || code == fasthttp.StatusOK {
c.SetStatusCode(code)
h.browseObjects(c, bktInfo, unescapedKey)
return
}
}
}
if err != nil {
if errors.Is(err, tree.ErrNodeAccessDenied) {
response.Error(req, "Access Denied", fasthttp.StatusForbidden)
return
response.Error(c, "Access Denied", fasthttp.StatusForbidden)
} else {
response.Error(c, "object wasn't found", fasthttp.StatusNotFound)
log.Error(logs.GetLatestObjectVersion, zap.Error(err))
}
log.Error(logs.GetLatestObjectVersion, zap.Error(err))
response.Error(req, "object wasn't found", fasthttp.StatusNotFound)
return
}
if foundOid.DeleteMarker {
log.Error(logs.ObjectWasDeleted)
response.Error(req, "object deleted", fasthttp.StatusNotFound)
response.Error(c, "object deleted", fasthttp.StatusNotFound)
return
}
@ -250,7 +261,7 @@ func (h *Handler) byObjectName(req *fasthttp.RequestCtx, f func(context.Context,
addr.SetContainer(bktInfo.CID)
addr.SetObject(foundOid.OID)
f(ctx, *h.newRequest(req, log), addr)
f(ctx, *h.newRequest(c, log), addr)
}
// byAttribute is a wrapper similar to byAddress.
@ -379,3 +390,25 @@ func (h *Handler) readContainer(ctx context.Context, cnrID cid.ID) (*data.Bucket
return bktInfo, err
}
dkirillov marked this conversation as resolved Outdated

It's better to write:

log := h.log.With(zap.String("bucket", bucketName))
It's better to write: ```golang log := h.log.With(zap.String("bucket", bucketName)) ```
func (h *Handler) listObjects(ctx context.Context, bucketInfo *data.BucketInfo, prefix string) ([]map[string]string, error) {
nodes, _, err := h.tree.GetSubTreeByPrefix(ctx, bucketInfo, prefix, true)
if err != nil {
return nil, err
}
var objects = make([]map[string]string, 0, len(nodes))
for _, node := range nodes {
meta := node.GetMeta()
if meta == nil {
continue
}
var obj = make(map[string]string, len(meta))
for _, m := range meta {
obj[m.GetKey()] = string(m.GetValue())
}
objects = append(objects, obj)
}
return objects, nil
}

View file

@ -12,6 +12,7 @@ import (
"time"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/cache"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/handler/middleware"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/resolver"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tree"
@ -37,6 +38,10 @@ func (t *treeClientMock) GetNodes(context.Context, *tree.GetNodesParams) ([]tree
return nil, nil
}
func (t *treeClientMock) GetSubTree(context.Context, *data.BucketInfo, string, []uint64, uint32, bool) ([]tree.NodeResponse, error) {
return nil, nil
}
type configMock struct {
}
@ -48,6 +53,17 @@ func (c *configMock) ZipCompression() bool {
return false
}
func (c *configMock) IndexPageEnabled() bool {
return false
}
func (c *configMock) IndexPageTemplatePath() string {
return ""
}
func (c *configMock) IndexPageTemplate() string {
return ""
}
func (c *configMock) ClientCut() bool {
return false
}

View file

@ -53,6 +53,7 @@ func (h *Handler) receiveFile(ctx context.Context, req request, objectAddress oi
dis = "inline"
start = time.Now()
filename string
filepath string
)
prm := PrmObjectGet{
@ -104,6 +105,8 @@ func (h *Handler) receiveFile(ctx context.Context, req request, objectAddress oi
time.Unix(value, 0).UTC().Format(http.TimeFormat))
case object.AttributeContentType:
contentType = val
case object.AttributeFilePath:
filepath = val
}
}
@ -135,6 +138,10 @@ func (h *Handler) receiveFile(ctx context.Context, req request, objectAddress oi
}
req.SetContentType(contentType)
if filename == "" {
filename = filepath
}
req.Response.Header.Set(fasthttp.HeaderContentDisposition, dis+"; filename="+path.Base(filename))
req.Response.SetBodyStream(rObj.Payload, int(payloadSize))

View file

@ -43,22 +43,22 @@ func (pr *putResponse) encode(w io.Writer) error {
}
// Upload handles multipart upload request.
func (h *Handler) Upload(req *fasthttp.RequestCtx) {
func (h *Handler) Upload(c *fasthttp.RequestCtx) {
var (
file MultipartFile
idObj oid.ID
addr oid.Address
scid, _ = req.UserValue("cid").(string)
scid, _ = c.UserValue("cid").(string)
log = h.log.With(zap.String("cid", scid))
bodyStream = req.RequestBodyStream()
bodyStream = c.RequestBodyStream()
drainBuf = make([]byte, drainBufSize)
)
ctx := utils.GetContextFromRequest(req)
ctx := utils.GetContextFromRequest(c)
bktInfo, err := h.getBucketInfo(ctx, scid, log)
if err != nil {
logAndSendBucketError(req, log, err)
logAndSendBucketError(c, log, err)
return
}
@ -75,21 +75,21 @@ func (h *Handler) Upload(req *fasthttp.RequestCtx) {
zap.Error(err),
)
}()
boundary := string(req.Request.Header.MultipartFormBoundary())
boundary := string(c.Request.Header.MultipartFormBoundary())
if file, err = fetchMultipartFile(h.log, bodyStream, boundary); err != nil {
log.Error(logs.CouldNotReceiveMultipartForm, zap.Error(err))
response.Error(req, "could not receive multipart/form: "+err.Error(), fasthttp.StatusBadRequest)
response.Error(c, "could not receive multipart/form: "+err.Error(), fasthttp.StatusBadRequest)
return
}
filtered, err := filterHeaders(h.log, &req.Request.Header)
filtered, err := filterHeaders(h.log, &c.Request.Header)
if err != nil {
log.Error(logs.CouldNotProcessHeaders, zap.Error(err))
response.Error(req, err.Error(), fasthttp.StatusBadRequest)
response.Error(c, err.Error(), fasthttp.StatusBadRequest)
return
}
now := time.Now()
if rawHeader := req.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
if rawHeader := c.Request.Header.Peek(fasthttp.HeaderDate); rawHeader != nil {
if parsed, err := time.Parse(http.TimeFormat, string(rawHeader)); err != nil {
log.Warn(logs.CouldNotParseClientTime, zap.String("Date header", string(rawHeader)), zap.Error(err))
} else {
@ -97,9 +97,9 @@ func (h *Handler) Upload(req *fasthttp.RequestCtx) {
}
}
if err = utils.PrepareExpirationHeader(req, h.frostfs, filtered, now); err != nil {
if err = utils.PrepareExpirationHeader(c, h.frostfs, filtered, now); err != nil {
log.Error(logs.CouldNotPrepareExpirationHeader, zap.Error(err))
response.Error(req, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
response.Error(c, "could not prepare expiration header: "+err.Error(), fasthttp.StatusBadRequest)
return
}
@ -143,7 +143,7 @@ func (h *Handler) Upload(req *fasthttp.RequestCtx) {
}
if idObj, err = h.frostfs.CreateObject(ctx, prm); err != nil {
h.handlePutFrostFSErr(req, err)
h.handlePutFrostFSErr(c, err)
return
}
@ -151,9 +151,9 @@ func (h *Handler) Upload(req *fasthttp.RequestCtx) {
addr.SetContainer(bktInfo.CID)
// Try to return the response, otherwise, if something went wrong, throw an error.
if err = newPutResponse(addr).encode(req); err != nil {
if err = newPutResponse(addr).encode(c); err != nil {
log.Error(logs.CouldNotEncodeResponse, zap.Error(err))
response.Error(req, "could not encode response", fasthttp.StatusBadRequest)
response.Error(c, "could not encode response", fasthttp.StatusBadRequest)
return
}
@ -170,8 +170,8 @@ func (h *Handler) Upload(req *fasthttp.RequestCtx) {
}
}
// Report status code and content type.
req.Response.SetStatusCode(fasthttp.StatusOK)
req.Response.Header.SetContentType(jsonHeader)
c.Response.SetStatusCode(fasthttp.StatusOK)
c.Response.Header.SetContentType(jsonHeader)
}
func (h *Handler) handlePutFrostFSErr(r *fasthttp.RequestCtx, err error) {

View file

@ -2,12 +2,14 @@ package handler
import (
"context"
"errors"
"strings"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/response"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tokens"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/tree"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/bearer"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
"github.com/valyala/fasthttp"
@ -38,6 +40,25 @@ func bearerToken(ctx context.Context) *bearer.Token {
return nil
}
func isDir(name string) bool {
return strings.HasSuffix(name, "/")
}
func isContainerRoot(key string) bool {
return key == ""
}
func checkErrorType(err error) int {
switch {
case err == nil:
return fasthttp.StatusOK
case errors.Is(err, tree.ErrNodeAccessDenied):
return fasthttp.StatusForbidden
default:
return fasthttp.StatusNotFound
}
}
func isValidToken(s string) bool {
for _, c := range s {
if c <= ' ' || c > 127 {

View file

@ -31,6 +31,8 @@ const (
CouldNotStoreFileInFrostfs = "could not store file in frostfs" // Error in ../../uploader/upload.go
AddAttributeToResultObject = "add attribute to result object" // Debug in ../../uploader/filter.go
FailedToCreateResolver = "failed to create resolver" // Fatal in ../../app.go
FailedToReadIndexPageTemplate = "failed to read index page template, set default" // Warn in ../../app.go
SetCustomIndexPageTemplate = "set custom index page template" // Info in ../../app.go
ContainerResolverWillBeDisabledBecauseOfResolversResolverOrderIsEmpty = "container resolver will be disabled because of resolvers 'resolver_order' is empty" // Info in ../../app.go
MetricsAreDisabled = "metrics are disabled" // Warn in ../../app.go
NoWalletPathSpecifiedCreatingEphemeralKeyAutomaticallyForThisRun = "no wallet path specified, creating ephemeral key automatically for this run" // Info in ../../app.go

View file

@ -0,0 +1,90 @@
{{$bucketName := .BucketName}}
{{ $prefix := trimPrefix .Prefix }}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<title>Index of s3://{{$bucketName}}/{{if $prefix}}/{{$prefix}}/{{end}}</title>
<style>
table {
width: 80%;
border-collapse: collapse;
}
body {
background: #f2f2f2;
}
table, th, td {
border: 0 solid transparent;
}
th, td {
padding: 10px;
text-align: left;
}
th {
background-color: #c3bcbc;
}
tr:nth-child(even) {background-color: #ebe7e7;}
</style>
</head>
<body>
<h1>Index of s3://{{$bucketName}}/{{if $prefix}}{{$prefix}}/{{end}}</h1>
<table>
<thead>
<tr>
<th>Filename</th>
<th>Size</th>
<th>Created</th>

We must add fourth (Download for example)

We must add fourth <th> (`Download` for example)
<th>Download</th>
</tr>
</thead>
<tbody>
{{ $trimmedPrefix := trimPrefix $prefix }}
{{if $trimmedPrefix }}
<tr>
<td>
⮐<a href="/get/{{$bucketName}}{{ urlencode $trimmedPrefix "" }}">..</a>
</td>
<td></td>
<td></td>
<td></td>
</tr>
{{else}}
<tr>
<td>
⮐<a href="/get/{{ $bucketName }}/">..</a>
</td>
<td></td>
<td></td>
<td></td>
</tr>
{{end}}
{{range .Objects}}
<tr>
<td>
{{if .IsDir}}
🗀
<a href="/get/{{ $bucketName }}{{ urlencode $prefix .FileName }}/">
{{.FileName}}/
</a>
{{else}}
🗎
<a href="/get/{{ $bucketName }}{{ urlencode $prefix .FileName }}">
{{.FileName}}
</a>
{{end}}
</td>
<td>{{if not .IsDir}}{{ formatSize .Size }}{{end}}</td>
<td>{{if not .IsDir}}{{ formatTimestamp .Created }}{{end}}</td>
<td>
{{ if not .IsDir }}
<a href="/get/{{ $bucketName}}{{ urlencode $prefix .FileName }}?download=true">
Link
</a>
{{ end }}
</td>
</tr>
{{end}}
</tbody>
</table>
</body>
</html>

View file

@ -0,0 +1,6 @@
package templates
import _ "embed"
//go:embed index.gotmpl
var DefaultIndexTemplate string

View file

@ -52,8 +52,8 @@ func BearerTokenFromCookie(h *fasthttp.RequestHeader) []byte {
// StoreBearerTokenAppCtx extracts a bearer token from the header or cookie and stores
// it in the application context.
func StoreBearerTokenAppCtx(ctx context.Context, req *fasthttp.RequestCtx) (context.Context, error) {
tkn, err := fetchBearerToken(req)
func StoreBearerTokenAppCtx(ctx context.Context, c *fasthttp.RequestCtx) (context.Context, error) {
tkn, err := fetchBearerToken(c)
if err != nil {
return nil, err
}

View file

@ -2,11 +2,13 @@ package tree
import (
"context"
"errors"
"fmt"
"strings"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/api"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/api/layer"
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/data"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
)
@ -20,6 +22,7 @@ type (
// Each method must return ErrNodeNotFound or ErrNodeAccessDenied if relevant.
ServiceClient interface {
GetNodes(ctx context.Context, p *GetNodesParams) ([]NodeResponse, error)
GetSubTree(ctx context.Context, bktInfo *data.BucketInfo, treeID string, rootID []uint64, depth uint32, sort bool) ([]NodeResponse, error)
}
treeNode struct {
@ -29,6 +32,7 @@ type (
GetNodesParams struct {
CnrID cid.ID
BktInfo *data.BucketInfo
TreeID string
Path []string
Meta []string
@ -54,6 +58,7 @@ const (
// keys for delete marker nodes.
isDeleteMarkerKV = "IsDeleteMarker"
sizeKV = "Size"
// versionTree -- ID of a tree with object versions.
versionTree = "version"
@ -73,26 +78,28 @@ type Meta interface {
type NodeResponse interface {
GetMeta() []Meta
GetTimestamp() uint64
GetTimestamp() []uint64
GetNodeID() []uint64
GetParentID() []uint64
}
alexvanin marked this conversation as resolved Outdated

Unused?

Unused?
func newTreeNode(nodeInfo NodeResponse) (*treeNode, error) {
treeNode := &treeNode{
tNode := &treeNode{
Meta: make(map[string]string, len(nodeInfo.GetMeta())),
}
for _, kv := range nodeInfo.GetMeta() {
switch kv.GetKey() {
case oidKV:
if err := treeNode.ObjID.DecodeString(string(kv.GetValue())); err != nil {
if err := tNode.ObjID.DecodeString(string(kv.GetValue())); err != nil {
return nil, err
}
default:
treeNode.Meta[kv.GetKey()] = string(kv.GetValue())
tNode.Meta[kv.GetKey()] = string(kv.GetValue())
}
}
return treeNode, nil
return tNode, nil
}
func (n *treeNode) Get(key string) (string, bool) {
@ -106,29 +113,44 @@ func (n *treeNode) FileName() (string, bool) {
}
func newNodeVersion(node NodeResponse) (*api.NodeVersion, error) {
treeNode, err := newTreeNode(node)
tNode, err := newTreeNode(node)
if err != nil {
return nil, fmt.Errorf("invalid tree node: %w", err)
}
return newNodeVersionFromTreeNode(treeNode), nil
return newNodeVersionFromTreeNode(tNode), nil
}
func newNodeVersionFromTreeNode(treeNode *treeNode) *api.NodeVersion {
_, isDeleteMarker := treeNode.Get(isDeleteMarkerKV)
size, _ := treeNode.Get(sizeKV)
version := &api.NodeVersion{
BaseNodeVersion: api.BaseNodeVersion{
OID: treeNode.ObjID,
},
DeleteMarker: isDeleteMarker,
IsPrefixNode: size == "",
}
return version
}
func (c *Tree) GetLatestVersion(ctx context.Context, cnrID *cid.ID, objectName string) (*api.NodeVersion, error) {
meta := []string{oidKV, isDeleteMarkerKV}
nodes, err := c.GetVersions(ctx, cnrID, objectName)
if err != nil {
return nil, err
}
latestNode, err := getLatestVersionNode(nodes)
if err != nil {
return nil, err
}
return newNodeVersion(latestNode)
}
func (c *Tree) GetVersions(ctx context.Context, cnrID *cid.ID, objectName string) ([]NodeResponse, error) {
meta := []string{oidKV, isDeleteMarkerKV, sizeKV}
path := pathFromName(objectName)
p := &GetNodesParams{
@ -139,30 +161,24 @@ func (c *Tree) GetLatestVersion(ctx context.Context, cnrID *cid.ID, objectName s
LatestOnly: false,
AllAttrs: false,
}
nodes, err := c.service.GetNodes(ctx, p)
if err != nil {
return nil, err
}
latestNode, err := getLatestNode(nodes)
if err != nil {
return nil, err
}
return newNodeVersion(latestNode)
return c.service.GetNodes(ctx, p)
}
func getLatestNode(nodes []NodeResponse) (NodeResponse, error) {
func getLatestVersionNode(nodes []NodeResponse) (NodeResponse, error) {
var (
maxCreationTime uint64
targetIndexNode = -1
)
for i, node := range nodes {
alexvanin marked this conversation as resolved Outdated

Can we synchronize this implementation with s3-gw?

We change this code anyway.

Can we synchronize this implementation with [s3-gw](https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/src/commit/34c1426b9f1d67fb7d39f597ca9ddcff840da888/pkg/service/tree/tree.go#L837)? We change this code anyway.

I think yes, they are quite similar

I think yes, they are quite similar
currentCreationTime := node.GetTimestamp()
if checkExistOID(node.GetMeta()) && currentCreationTime > maxCreationTime {
maxCreationTime = currentCreationTime
if !checkExistOID(node.GetMeta()) {
continue
}
if currentCreationTime := getMaxTimestamp(node); currentCreationTime > maxCreationTime {
targetIndexNode = i
maxCreationTime = currentCreationTime
}
}
@ -187,3 +203,145 @@ func checkExistOID(meta []Meta) bool {
func pathFromName(objectName string) []string {
return strings.Split(objectName, separator)
}
func (c *Tree) GetSubTreeByPrefix(ctx context.Context, bktInfo *data.BucketInfo, prefix string, latestOnly bool) ([]NodeResponse, string, error) {
rootID, tailPrefix, err := c.determinePrefixNode(ctx, bktInfo, versionTree, prefix)
if err != nil {
return nil, "", err
}
subTree, err := c.service.GetSubTree(ctx, bktInfo, versionTree, rootID, 2, false)
if err != nil {
if errors.Is(err, layer.ErrNodeNotFound) {
return nil, "", nil
}
return nil, "", err
}
nodesMap := make(map[string][]NodeResponse, len(subTree))
for _, node := range subTree {
if MultiID(rootID).Equal(node.GetNodeID()) {
continue
}
fileName := GetFilename(node)
if !strings.HasPrefix(fileName, tailPrefix) {
continue
}
nodes := nodesMap[fileName]
// Add all nodes if flag latestOnly is false.
// Add all intermediate nodes
// and only latest leaf (object) nodes. To do this store and replace last leaf (object) node in nodes[0]
if len(nodes) == 0 {
nodes = []NodeResponse{node}
} else if !latestOnly || isIntermediate(node) {
nodes = append(nodes, node)
} else if isIntermediate(nodes[0]) {
nodes = append([]NodeResponse{node}, nodes...)
} else if getMaxTimestamp(node) > getMaxTimestamp(nodes[0]) {
nodes[0] = node
}
nodesMap[fileName] = nodes
}
result := make([]NodeResponse, 0, len(subTree))
for _, nodes := range nodesMap {
result = append(result, nodes...)
}
return result, strings.TrimSuffix(prefix, tailPrefix), nil
}
func (c *Tree) determinePrefixNode(ctx context.Context, bktInfo *data.BucketInfo, treeID, prefix string) ([]uint64, string, error) {
rootID := []uint64{0}
path := strings.Split(prefix, separator)
tailPrefix := path[len(path)-1]
if len(path) > 1 {
var err error
rootID, err = c.getPrefixNodeID(ctx, bktInfo, treeID, path[:len(path)-1])
if err != nil {
return nil, "", err
}
}
return rootID, tailPrefix, nil
}
func (c *Tree) getPrefixNodeID(ctx context.Context, bktInfo *data.BucketInfo, treeID string, prefixPath []string) ([]uint64, error) {
p := &GetNodesParams{
CnrID: bktInfo.CID,
BktInfo: bktInfo,
TreeID: treeID,
Path: prefixPath,
LatestOnly: false,
AllAttrs: true,
}
nodes, err := c.service.GetNodes(ctx, p)
if err != nil {
return nil, err
}
var intermediateNodes []uint64
for _, node := range nodes {
if isIntermediate(node) {
intermediateNodes = append(intermediateNodes, node.GetNodeID()...)
}
}
if len(intermediateNodes) == 0 {
return nil, layer.ErrNodeNotFound
alexvanin marked this conversation as resolved Outdated

I wonder, is it affected by an issue from TrueCloudLab/frostfs-s3-gw#488?

Should we adopt these s3 gateway changes here or it doesn't matter here?

I wonder, is it affected by an issue from https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pulls/488? Should we adopt these s3 gateway changes here or it doesn't matter here?

I think it's okay because we don't extract err code from layer.error. Instead of that we check error and write to response a new one

if err != nil {

I think it's okay because we don't extract err code from layer.error. Instead of that we check error and write to response a new one https://git.frostfs.info/TrueCloudLab/frostfs-http-gw/src/commit/5e24713418c296261689e8da497e173cf8385a45/internal/handler/handler.go#L236
}
return intermediateNodes, nil
}
func GetFilename(node NodeResponse) string {
for _, kv := range node.GetMeta() {
if kv.GetKey() == FileNameKey {
return string(kv.GetValue())
}
}
return ""
}
func isIntermediate(node NodeResponse) bool {
if len(node.GetMeta()) != 1 {
return false
}
return node.GetMeta()[0].GetKey() == FileNameKey
}
func getMaxTimestamp(node NodeResponse) uint64 {
var maxTimestamp uint64
for _, timestamp := range node.GetTimestamp() {
if timestamp > maxTimestamp {
maxTimestamp = timestamp
}
}
return maxTimestamp
}
type MultiID []uint64
func (m MultiID) Equal(id MultiID) bool {
seen := make(map[uint64]struct{}, len(m))
for i := range m {
seen[m[i]] = struct{}{}
}
for i := range id {
if _, ok := seen[id[i]]; !ok {
return false
}
}
return true
}

View file

@ -24,8 +24,8 @@ type nodeResponse struct {
timestamp uint64
}
func (n nodeResponse) GetTimestamp() uint64 {
return n.timestamp
func (n nodeResponse) GetTimestamp() []uint64 {
return []uint64{n.timestamp}
}
func (n nodeResponse) GetMeta() []Meta {
@ -36,6 +36,13 @@ func (n nodeResponse) GetMeta() []Meta {
return res
}
func (n nodeResponse) GetNodeID() []uint64 {
return nil
}
func (n nodeResponse) GetParentID() []uint64 {

Please add empty line between methods

Please add empty line between methods

Still there isn't empty line

Still there isn't empty line
return nil
}
func TestGetLatestNode(t *testing.T) {
for _, tc := range []struct {
name string
@ -130,7 +137,7 @@ func TestGetLatestNode(t *testing.T) {
},
} {
t.Run(tc.name, func(t *testing.T) {
actualNode, err := getLatestNode(tc.nodes)
actualNode, err := getLatestVersionNode(tc.nodes)
if tc.error {
require.Error(t, err)
return