[#369] Modify data type detector

Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
Nikita Zinkevich 2024-08-23 11:48:14 +03:00
parent a5b38537e6
commit 2b2de00bd8
6 changed files with 50 additions and 52 deletions

View file

@ -12,6 +12,7 @@ import (
"fmt"
"io"
"mime"
"net/http"
"path/filepath"
"strconv"
"strings"
@ -21,6 +22,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data"
apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
@ -237,11 +239,11 @@ func (n *Layer) PutObject(ctx context.Context, p *PutObjectParams) (*data.Extend
if r != nil {
if len(p.Header[api.ContentType]) == 0 {
if contentType := MimeByFilePath(p.Object); len(contentType) == 0 {
d := newDetector(r)
d := detector.NewDetector(r, http.DetectContentType)
if contentType, err := d.Detect(); err == nil {
p.Header[api.ContentType] = contentType
}
r = d.MultiReader()
r = d.RestoredReader()
} else {
p.Header[api.ContentType] = contentType
}

View file

@ -9,6 +9,7 @@ import (
"net/url"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
@ -211,6 +212,7 @@ func (f *fileLogger) newLoggerConfig() zap.Config {
c.EncoderConfig.LevelKey = zapcore.OmitKey
c.EncoderConfig.TimeKey = zapcore.OmitKey
c.EncoderConfig.FunctionKey = zapcore.OmitKey
c.Sampling = nil
return c
}
@ -231,16 +233,13 @@ func getBody(httpBody io.ReadCloser, l *zap.Logger) []byte {
// processBody reads body and base64 encode it if it's not XML.
func processBody(bodyReader io.Reader) ([]byte, error) {
resultBody := &bytes.Buffer{}
isXML, checkedBytes, err := utils.DetectXML(bodyReader)
detect := detector.NewDetector(bodyReader, utils.DetectXML)
dataType, err := detect.Detect()
if err != nil {
return nil, err
}
writer := utils.ChooseWriter(isXML, resultBody)
_, err = writer.Write(checkedBytes)
if err != nil {
return nil, err
}
if _, err = io.Copy(writer, bodyReader); err != nil {
writer := utils.ChooseWriter(dataType, resultBody)
if _, err = io.Copy(writer, detect.RestoredReader()); err != nil {
return nil, err
}
if err = writer.Close(); err != nil {

View file

@ -18,6 +18,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
"github.com/spf13/cobra"
@ -62,18 +63,15 @@ func logResponse(cmd *cobra.Command, r *http.Request, resp *http.Response) {
if resp.ContentLength == 0 {
return
}
isXML, checkBuf, err := utils.DetectXML(resp.Body)
detect := detector.NewDetector(resp.Body, utils.DetectXML)
dataType, err := detect.Detect()
if err != nil {
cmd.Println(err.Error())
return
}
body := &bytes.Buffer{}
resultWriter := utils.ChooseWriter(isXML, body)
if _, err = resultWriter.Write(checkBuf); err != nil {
cmd.Println(err)
return
}
_, err = io.Copy(resultWriter, io.LimitReader(resp.Body, viper.GetInt64(printResponseLimit)))
resultWriter := utils.ChooseWriter(dataType, body)
_, err = io.Copy(resultWriter, io.LimitReader(detect.RestoredReader(), viper.GetInt64(printResponseLimit)))
if err != nil {
cmd.Println(err)
return

View file

@ -1,15 +1,15 @@
package layer
package detector
import (
"io"
"net/http"
)
type (
detector struct {
Detector struct {
io.Reader
err error
data []byte
err error
data []byte
detectFunc func([]byte) string
}
errReader struct {
data []byte
@ -36,23 +36,24 @@ func (r *errReader) Read(b []byte) (int, error) {
return n, nil
}
func newDetector(reader io.Reader) *detector {
return &detector{
data: make([]byte, contentTypeDetectSize),
Reader: reader,
func NewDetector(reader io.Reader, detectFunc func([]byte) string) *Detector {
return &Detector{
data: make([]byte, contentTypeDetectSize),
Reader: reader,
detectFunc: detectFunc,
}
}
func (d *detector) Detect() (string, error) {
func (d *Detector) Detect() (string, error) {
n, err := d.Reader.Read(d.data)
if err != nil && err != io.EOF {
d.err = err
return "", err
}
d.data = d.data[:n]
return http.DetectContentType(d.data), nil
return d.detectFunc(d.data), nil
}
func (d *detector) MultiReader() io.Reader {
func (d *Detector) RestoredReader() io.Reader {
return io.MultiReader(newReader(d.data, d.err), d.Reader)
}

View file

@ -14,6 +14,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
"github.com/aws/aws-sdk-go-v2/credentials"
@ -49,11 +50,12 @@ func (h *httpBody) UnmarshalJSON(data []byte) error {
if err != nil {
return fmt.Errorf("failed to unquote data: %w", err)
}
isXML, _, err := utils.DetectXML(strings.NewReader(unquoted))
detect := detector.NewDetector(strings.NewReader(unquoted), utils.DetectXML)
dataType, err := detect.Detect()
if err != nil {
return fmt.Errorf("failed to detect httpbody type: %w", err)
return fmt.Errorf("failed to detect data: %w", err)
}
reader := utils.ChooseReader(isXML, strings.NewReader(unquoted))
reader := utils.ChooseReader(dataType, detect.RestoredReader())
*h, err = io.ReadAll(reader)
if err != nil {
return fmt.Errorf("failed to unmarshal httpbody: %w", err)

View file

@ -4,7 +4,6 @@ import (
"bytes"
"encoding/base64"
"encoding/xml"
"errors"
"io"
)
@ -16,38 +15,35 @@ func (b nopCloseWriter) Close() error {
return nil
}
const BodyRecognizeLimit int64 = 128
const (
nonXML = "nonXML"
typeXML = "application/XML"
)
func DetectXML(reader io.Reader) (bool, []byte, error) {
detectBuf := bytes.NewBuffer(nil)
detectReader := io.TeeReader(io.LimitReader(reader, BodyRecognizeLimit), detectBuf)
token, err := xml.NewDecoder(detectReader).RawToken()
func DetectXML(data []byte) string {
token, err := xml.NewDecoder(bytes.NewReader(data)).RawToken()
if err != nil {
var xmlErr *xml.SyntaxError
if errors.Is(err, io.EOF) || errors.As(err, &xmlErr) {
return false, detectBuf.Bytes(), nil
}
return false, detectBuf.Bytes(), err
return nonXML
}
switch token.(type) {
case xml.StartElement, xml.ProcInst:
return true, detectBuf.Bytes(), nil
return typeXML
}
return false, detectBuf.Bytes(), nil
return nonXML
}
func ChooseWriter(isXML bool, bodyWriter io.Writer) io.WriteCloser {
func ChooseWriter(dataType string, bodyWriter io.Writer) io.WriteCloser {
writeCloser := nopCloseWriter{bodyWriter}
if !isXML {
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
if dataType == typeXML {
return writeCloser
}
return writeCloser
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
}
func ChooseReader(isXML bool, bodyReader io.Reader) io.Reader {
if !isXML {
return base64.NewDecoder(base64.StdEncoding, bodyReader)
func ChooseReader(dataType string, bodyReader io.Reader) io.Reader {
if dataType == typeXML {
return bodyReader
}
return bodyReader
return base64.NewDecoder(base64.StdEncoding, bodyReader)
}