[#369] Modify data type detector
All checks were successful
/ DCO (pull_request) Successful in 1m23s
/ Vulncheck (pull_request) Successful in 1m30s
/ Builds (1.22) (pull_request) Successful in 1m35s
/ Builds (1.23) (pull_request) Successful in 1m42s
/ Lint (pull_request) Successful in 2m4s
/ Tests (1.22) (pull_request) Successful in 1m52s
/ Tests (1.23) (pull_request) Successful in 1m55s

Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
Nikita Zinkevich 2024-08-23 11:48:14 +03:00
parent 490a9ed95e
commit d3832af5ba
9 changed files with 51 additions and 58 deletions

View file

@ -12,6 +12,7 @@ import (
"fmt"
"io"
"mime"
"net/http"
"path/filepath"
"strconv"
"strings"
@ -21,6 +22,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data"
apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
@ -245,11 +247,11 @@ func (n *Layer) PutObject(ctx context.Context, p *PutObjectParams) (*data.Extend
if r != nil {
if len(p.Header[api.ContentType]) == 0 {
if contentType := MimeByFilePath(p.Object); len(contentType) == 0 {
d := newDetector(r)
d := detector.NewDetector(r, http.DetectContentType)
if contentType, err := d.Detect(); err == nil {
p.Header[api.ContentType] = contentType
}
r = d.MultiReader()
r = d.RestoredReader()
} else {
p.Header[api.ContentType] = contentType
}

View file

@ -9,6 +9,7 @@ import (
"net/url"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
@ -211,6 +212,7 @@ func (f *fileLogger) newLoggerConfig() zap.Config {
c.EncoderConfig.LevelKey = zapcore.OmitKey
c.EncoderConfig.TimeKey = zapcore.OmitKey
c.EncoderConfig.FunctionKey = zapcore.OmitKey
c.Sampling = nil
return c
}
@ -231,16 +233,13 @@ func getBody(httpBody io.ReadCloser, l *zap.Logger) []byte {
// processBody reads body and base64 encode it if it's not XML.
func processBody(bodyReader io.Reader) ([]byte, error) {
resultBody := &bytes.Buffer{}
isXML, checkedBytes, err := utils.DetectXML(bodyReader)
detect := detector.NewDetector(bodyReader, utils.DetectXML)
dataType, err := detect.Detect()
if err != nil {
return nil, err
}
writer := utils.ChooseWriter(isXML, resultBody)
_, err = writer.Write(checkedBytes)
if err != nil {
return nil, err
}
if _, err = io.Copy(writer, bodyReader); err != nil {
writer := utils.ChooseWriter(dataType, resultBody)
if _, err = io.Copy(writer, detect.RestoredReader()); err != nil {
return nil, err
}
if err = writer.Close(); err != nil {

View file

@ -589,9 +589,6 @@ func newMaxClients(cfg *viper.Viper) maxClientsConfig {
}
func (s *appSettings) updateHTTPLoggingSettings(cfg *viper.Viper, log *zap.Logger) {
s.mu.Lock()
defer s.mu.Unlock()
s.httpLogging.Enabled = cfg.GetBool(cfgHTTPLoggingEnabled)
s.httpLogging.MaxBody = cfg.GetInt64(cfgHTTPLoggingMaxBody)
s.httpLogging.MaxLogSize = cfg.GetInt(cfgHTTPLoggingMaxLogSize)

View file

@ -18,6 +18,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
"github.com/spf13/cobra"
@ -62,18 +63,15 @@ func logResponse(cmd *cobra.Command, r *http.Request, resp *http.Response) {
if resp.ContentLength == 0 {
return
}
isXML, checkBuf, err := utils.DetectXML(resp.Body)
detect := detector.NewDetector(resp.Body, utils.DetectXML)
dataType, err := detect.Detect()
if err != nil {
cmd.Println(err.Error())
return
}
body := &bytes.Buffer{}
resultWriter := utils.ChooseWriter(isXML, body)
if _, err = resultWriter.Write(checkBuf); err != nil {
cmd.Println(err)
return
}
_, err = io.Copy(resultWriter, io.LimitReader(resp.Body, viper.GetInt64(printResponseLimit)))
resultWriter := utils.ChooseWriter(dataType, body)
_, err = io.Copy(resultWriter, io.LimitReader(detect.RestoredReader(), viper.GetInt64(printResponseLimit)))
if err != nil {
cmd.Println(err)
return

View file

@ -57,7 +57,7 @@ S3_GW_HTTP_LOGGING_ENABLED=false
# max body size to log
S3_GW_HTTP_LOGGING_MAX_BODY=1024
# max log size in Mb
S3_GW_HTTP_LOGGING_MAX_LOG_SIZE: 20
S3_GW_HTTP_LOGGING_MAX_LOG_SIZE=20
# use log compression
S3_GW_HTTP_LOGGING_GZIP=true
# possible destination output values: filesystem path, url, "stdout", "stderr"

2
go.sum
View file

@ -179,8 +179,6 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=

View file

@ -1,15 +1,15 @@
package layer
package detector
import (
"io"
"net/http"
)
type (
detector struct {
Detector struct {
io.Reader
err error
data []byte
detectFunc func([]byte) string
}
errReader struct {
data []byte
@ -36,23 +36,24 @@ func (r *errReader) Read(b []byte) (int, error) {
return n, nil
}
func newDetector(reader io.Reader) *detector {
return &detector{
func NewDetector(reader io.Reader, detectFunc func([]byte) string) *Detector {
return &Detector{
data: make([]byte, contentTypeDetectSize),
Reader: reader,
detectFunc: detectFunc,
}
}
func (d *detector) Detect() (string, error) {
func (d *Detector) Detect() (string, error) {
n, err := d.Reader.Read(d.data)
if err != nil && err != io.EOF {
d.err = err
return "", err
}
d.data = d.data[:n]
return http.DetectContentType(d.data), nil
return d.detectFunc(d.data), nil
}
func (d *detector) MultiReader() io.Reader {
func (d *Detector) RestoredReader() io.Reader {
return io.MultiReader(newReader(d.data, d.err), d.Reader)
}

View file

@ -14,6 +14,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
"github.com/aws/aws-sdk-go-v2/credentials"
@ -49,11 +50,12 @@ func (h *httpBody) UnmarshalJSON(data []byte) error {
if err != nil {
return fmt.Errorf("failed to unquote data: %w", err)
}
isXML, _, err := utils.DetectXML(strings.NewReader(unquoted))
detect := detector.NewDetector(strings.NewReader(unquoted), utils.DetectXML)
dataType, err := detect.Detect()
if err != nil {
return fmt.Errorf("failed to detect httpbody type: %w", err)
return fmt.Errorf("failed to detect data: %w", err)
}
reader := utils.ChooseReader(isXML, strings.NewReader(unquoted))
reader := utils.ChooseReader(dataType, detect.RestoredReader())
*h, err = io.ReadAll(reader)
if err != nil {
return fmt.Errorf("failed to unmarshal httpbody: %w", err)

View file

@ -4,7 +4,6 @@ import (
"bytes"
"encoding/base64"
"encoding/xml"
"errors"
"io"
)
@ -16,38 +15,35 @@ func (b nopCloseWriter) Close() error {
return nil
}
const BodyRecognizeLimit int64 = 128
const (
nonXML = "nonXML"
typeXML = "application/XML"
)
func DetectXML(reader io.Reader) (bool, []byte, error) {
detectBuf := bytes.NewBuffer(nil)
detectReader := io.TeeReader(io.LimitReader(reader, BodyRecognizeLimit), detectBuf)
token, err := xml.NewDecoder(detectReader).RawToken()
func DetectXML(data []byte) string {
token, err := xml.NewDecoder(bytes.NewReader(data)).RawToken()
if err != nil {
var xmlErr *xml.SyntaxError
if errors.Is(err, io.EOF) || errors.As(err, &xmlErr) {
return false, detectBuf.Bytes(), nil
}
return false, detectBuf.Bytes(), err
return nonXML
}
switch token.(type) {
case xml.StartElement, xml.ProcInst:
return true, detectBuf.Bytes(), nil
return typeXML
}
return false, detectBuf.Bytes(), nil
return nonXML
}
func ChooseWriter(isXML bool, bodyWriter io.Writer) io.WriteCloser {
func ChooseWriter(dataType string, bodyWriter io.Writer) io.WriteCloser {
writeCloser := nopCloseWriter{bodyWriter}
if !isXML {
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
}
if dataType == typeXML {
return writeCloser
}
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
}
func ChooseReader(isXML bool, bodyReader io.Reader) io.Reader {
if !isXML {
return base64.NewDecoder(base64.StdEncoding, bodyReader)
}
func ChooseReader(dataType string, bodyReader io.Reader) io.Reader {
if dataType == typeXML {
return bodyReader
}
return base64.NewDecoder(base64.StdEncoding, bodyReader)
}