[#369] Modify data type detector

Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
Nikita Zinkevich 2024-08-23 11:48:14 +03:00 committed by Alex Vanin
parent 4177898f97
commit 4ff049f480
6 changed files with 50 additions and 52 deletions

View file

@ -12,6 +12,7 @@ import (
"fmt" "fmt"
"io" "io"
"mime" "mime"
"net/http"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
@ -21,6 +22,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data"
apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors" apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
@ -237,11 +239,11 @@ func (n *layer) PutObject(ctx context.Context, p *PutObjectParams) (*data.Extend
if r != nil { if r != nil {
if len(p.Header[api.ContentType]) == 0 { if len(p.Header[api.ContentType]) == 0 {
if contentType := MimeByFilePath(p.Object); len(contentType) == 0 { if contentType := MimeByFilePath(p.Object); len(contentType) == 0 {
d := newDetector(r) d := detector.NewDetector(r, http.DetectContentType)
if contentType, err := d.Detect(); err == nil { if contentType, err := d.Detect(); err == nil {
p.Header[api.ContentType] = contentType p.Header[api.ContentType] = contentType
} }
r = d.MultiReader() r = d.RestoredReader()
} else { } else {
p.Header[api.ContentType] = contentType p.Header[api.ContentType] = contentType
} }

View file

@ -9,6 +9,7 @@ import (
"net/url" "net/url"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
"go.uber.org/zap" "go.uber.org/zap"
"go.uber.org/zap/zapcore" "go.uber.org/zap/zapcore"
@ -211,6 +212,7 @@ func (f *fileLogger) newLoggerConfig() zap.Config {
c.EncoderConfig.LevelKey = zapcore.OmitKey c.EncoderConfig.LevelKey = zapcore.OmitKey
c.EncoderConfig.TimeKey = zapcore.OmitKey c.EncoderConfig.TimeKey = zapcore.OmitKey
c.EncoderConfig.FunctionKey = zapcore.OmitKey c.EncoderConfig.FunctionKey = zapcore.OmitKey
c.Sampling = nil
return c return c
} }
@ -231,16 +233,13 @@ func getBody(httpBody io.ReadCloser, l *zap.Logger) []byte {
// processBody reads body and base64 encode it if it's not XML. // processBody reads body and base64 encode it if it's not XML.
func processBody(bodyReader io.Reader) ([]byte, error) { func processBody(bodyReader io.Reader) ([]byte, error) {
resultBody := &bytes.Buffer{} resultBody := &bytes.Buffer{}
isXML, checkedBytes, err := utils.DetectXML(bodyReader) detect := detector.NewDetector(bodyReader, utils.DetectXML)
dataType, err := detect.Detect()
if err != nil { if err != nil {
return nil, err return nil, err
} }
writer := utils.ChooseWriter(isXML, resultBody) writer := utils.ChooseWriter(dataType, resultBody)
_, err = writer.Write(checkedBytes) if _, err = io.Copy(writer, detect.RestoredReader()); err != nil {
if err != nil {
return nil, err
}
if _, err = io.Copy(writer, bodyReader); err != nil {
return nil, err return nil, err
} }
if err = writer.Close(); err != nil { if err = writer.Close(); err != nil {

View file

@ -18,6 +18,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@ -62,18 +63,15 @@ func logResponse(cmd *cobra.Command, r *http.Request, resp *http.Response) {
if resp.ContentLength == 0 { if resp.ContentLength == 0 {
return return
} }
isXML, checkBuf, err := utils.DetectXML(resp.Body) detect := detector.NewDetector(resp.Body, utils.DetectXML)
dataType, err := detect.Detect()
if err != nil { if err != nil {
cmd.Println(err.Error()) cmd.Println(err.Error())
return return
} }
body := &bytes.Buffer{} body := &bytes.Buffer{}
resultWriter := utils.ChooseWriter(isXML, body) resultWriter := utils.ChooseWriter(dataType, body)
if _, err = resultWriter.Write(checkBuf); err != nil { _, err = io.Copy(resultWriter, io.LimitReader(detect.RestoredReader(), viper.GetInt64(printResponseLimit)))
cmd.Println(err)
return
}
_, err = io.Copy(resultWriter, io.LimitReader(resp.Body, viper.GetInt64(printResponseLimit)))
if err != nil { if err != nil {
cmd.Println(err) cmd.Println(err)
return return

View file

@ -1,15 +1,15 @@
package layer package detector
import ( import (
"io" "io"
"net/http"
) )
type ( type (
detector struct { Detector struct {
io.Reader io.Reader
err error err error
data []byte data []byte
detectFunc func([]byte) string
} }
errReader struct { errReader struct {
data []byte data []byte
@ -36,23 +36,24 @@ func (r *errReader) Read(b []byte) (int, error) {
return n, nil return n, nil
} }
func newDetector(reader io.Reader) *detector { func NewDetector(reader io.Reader, detectFunc func([]byte) string) *Detector {
return &detector{ return &Detector{
data: make([]byte, contentTypeDetectSize), data: make([]byte, contentTypeDetectSize),
Reader: reader, Reader: reader,
detectFunc: detectFunc,
} }
} }
func (d *detector) Detect() (string, error) { func (d *Detector) Detect() (string, error) {
n, err := d.Reader.Read(d.data) n, err := d.Reader.Read(d.data)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
d.err = err d.err = err
return "", err return "", err
} }
d.data = d.data[:n] d.data = d.data[:n]
return http.DetectContentType(d.data), nil return d.detectFunc(d.data), nil
} }
func (d *detector) MultiReader() io.Reader { func (d *Detector) RestoredReader() io.Reader {
return io.MultiReader(newReader(d.data, d.err), d.Reader) return io.MultiReader(newReader(d.data, d.err), d.Reader)
} }

View file

@ -14,6 +14,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4" v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
"github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/credentials"
@ -49,11 +50,12 @@ func (h *httpBody) UnmarshalJSON(data []byte) error {
if err != nil { if err != nil {
return fmt.Errorf("failed to unquote data: %w", err) return fmt.Errorf("failed to unquote data: %w", err)
} }
isXML, _, err := utils.DetectXML(strings.NewReader(unquoted)) detect := detector.NewDetector(strings.NewReader(unquoted), utils.DetectXML)
dataType, err := detect.Detect()
if err != nil { if err != nil {
return fmt.Errorf("failed to detect httpbody type: %w", err) return fmt.Errorf("failed to detect data: %w", err)
} }
reader := utils.ChooseReader(isXML, strings.NewReader(unquoted)) reader := utils.ChooseReader(dataType, detect.RestoredReader())
*h, err = io.ReadAll(reader) *h, err = io.ReadAll(reader)
if err != nil { if err != nil {
return fmt.Errorf("failed to unmarshal httpbody: %w", err) return fmt.Errorf("failed to unmarshal httpbody: %w", err)

View file

@ -4,7 +4,6 @@ import (
"bytes" "bytes"
"encoding/base64" "encoding/base64"
"encoding/xml" "encoding/xml"
"errors"
"io" "io"
) )
@ -16,38 +15,35 @@ func (b nopCloseWriter) Close() error {
return nil return nil
} }
const BodyRecognizeLimit int64 = 128 const (
nonXML = "nonXML"
typeXML = "application/XML"
)
func DetectXML(reader io.Reader) (bool, []byte, error) { func DetectXML(data []byte) string {
detectBuf := bytes.NewBuffer(nil) token, err := xml.NewDecoder(bytes.NewReader(data)).RawToken()
detectReader := io.TeeReader(io.LimitReader(reader, BodyRecognizeLimit), detectBuf)
token, err := xml.NewDecoder(detectReader).RawToken()
if err != nil { if err != nil {
var xmlErr *xml.SyntaxError return nonXML
if errors.Is(err, io.EOF) || errors.As(err, &xmlErr) {
return false, detectBuf.Bytes(), nil
}
return false, detectBuf.Bytes(), err
} }
switch token.(type) { switch token.(type) {
case xml.StartElement, xml.ProcInst: case xml.StartElement, xml.ProcInst:
return true, detectBuf.Bytes(), nil return typeXML
} }
return false, detectBuf.Bytes(), nil return nonXML
} }
func ChooseWriter(isXML bool, bodyWriter io.Writer) io.WriteCloser { func ChooseWriter(dataType string, bodyWriter io.Writer) io.WriteCloser {
writeCloser := nopCloseWriter{bodyWriter} writeCloser := nopCloseWriter{bodyWriter}
if !isXML { if dataType == typeXML {
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
}
return writeCloser return writeCloser
} }
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
func ChooseReader(isXML bool, bodyReader io.Reader) io.Reader {
if !isXML {
return base64.NewDecoder(base64.StdEncoding, bodyReader)
} }
func ChooseReader(dataType string, bodyReader io.Reader) io.Reader {
if dataType == typeXML {
return bodyReader return bodyReader
} }
return base64.NewDecoder(base64.StdEncoding, bodyReader)
}