From 05fec217d77f9a8e093fbe65c4814b57762c345d Mon Sep 17 00:00:00 2001 From: Nikita Zinkevich Date: Fri, 23 Aug 2024 11:48:14 +0300 Subject: [PATCH] [#369] Modify data type detector Signed-off-by: Nikita Zinkevich --- api/layer/object.go | 6 ++-- api/middleware/log_http.go | 13 ++++----- cmd/s3-playback/modules/run.go | 12 ++++---- {api/layer => pkg/detector}/detector.go | 25 ++++++++-------- playback/request.go | 8 ++++-- playback/utils/utils.go | 38 +++++++++++-------------- 6 files changed, 50 insertions(+), 52 deletions(-) rename {api/layer => pkg/detector}/detector.go (60%) diff --git a/api/layer/object.go b/api/layer/object.go index 8c4549d..4d3c92c 100644 --- a/api/layer/object.go +++ b/api/layer/object.go @@ -12,6 +12,7 @@ import ( "fmt" "io" "mime" + "net/http" "path/filepath" "strconv" "strings" @@ -21,6 +22,7 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data" apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs" + "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" @@ -245,11 +247,11 @@ func (n *Layer) PutObject(ctx context.Context, p *PutObjectParams) (*data.Extend if r != nil { if len(p.Header[api.ContentType]) == 0 { if contentType := MimeByFilePath(p.Object); len(contentType) == 0 { - d := newDetector(r) + d := detector.NewDetector(r, http.DetectContentType) if contentType, err := d.Detect(); err == nil { p.Header[api.ContentType] = contentType } - r = d.MultiReader() + r = d.RestoredReader() } else { p.Header[api.ContentType] = contentType } diff --git a/api/middleware/log_http.go b/api/middleware/log_http.go index 848e4c8..a32cef5 100644 --- a/api/middleware/log_http.go +++ b/api/middleware/log_http.go @@ -9,6 +9,7 @@ import ( "net/url" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs" + "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -211,6 +212,7 @@ func (f *fileLogger) newLoggerConfig() zap.Config { c.EncoderConfig.LevelKey = zapcore.OmitKey c.EncoderConfig.TimeKey = zapcore.OmitKey c.EncoderConfig.FunctionKey = zapcore.OmitKey + c.Sampling = nil return c } @@ -231,16 +233,13 @@ func getBody(httpBody io.ReadCloser, l *zap.Logger) []byte { // processBody reads body and base64 encode it if it's not XML. func processBody(bodyReader io.Reader) ([]byte, error) { resultBody := &bytes.Buffer{} - isXML, checkedBytes, err := utils.DetectXML(bodyReader) + detect := detector.NewDetector(bodyReader, utils.DetectXML) + dataType, err := detect.Detect() if err != nil { return nil, err } - writer := utils.ChooseWriter(isXML, resultBody) - _, err = writer.Write(checkedBytes) - if err != nil { - return nil, err - } - if _, err = io.Copy(writer, bodyReader); err != nil { + writer := utils.ChooseWriter(dataType, resultBody) + if _, err = io.Copy(writer, detect.RestoredReader()); err != nil { return nil, err } if err = writer.Close(); err != nil { diff --git a/cmd/s3-playback/modules/run.go b/cmd/s3-playback/modules/run.go index 57dcc3d..dabf4a0 100644 --- a/cmd/s3-playback/modules/run.go +++ b/cmd/s3-playback/modules/run.go @@ -18,6 +18,7 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth" + "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils" "github.com/spf13/cobra" @@ -62,18 +63,15 @@ func logResponse(cmd *cobra.Command, r *http.Request, resp *http.Response) { if resp.ContentLength == 0 { return } - isXML, checkBuf, err := utils.DetectXML(resp.Body) + detect := detector.NewDetector(resp.Body, utils.DetectXML) + dataType, err := detect.Detect() if err != nil { cmd.Println(err.Error()) return } body := &bytes.Buffer{} - resultWriter := utils.ChooseWriter(isXML, body) - if _, err = resultWriter.Write(checkBuf); err != nil { - cmd.Println(err) - return - } - _, err = io.Copy(resultWriter, io.LimitReader(resp.Body, viper.GetInt64(printResponseLimit))) + resultWriter := utils.ChooseWriter(dataType, body) + _, err = io.Copy(resultWriter, io.LimitReader(detect.RestoredReader(), viper.GetInt64(printResponseLimit))) if err != nil { cmd.Println(err) return diff --git a/api/layer/detector.go b/pkg/detector/detector.go similarity index 60% rename from api/layer/detector.go rename to pkg/detector/detector.go index 81ec75b..972725b 100644 --- a/api/layer/detector.go +++ b/pkg/detector/detector.go @@ -1,15 +1,15 @@ -package layer +package detector import ( "io" - "net/http" ) type ( - detector struct { + Detector struct { io.Reader - err error - data []byte + err error + data []byte + detectFunc func([]byte) string } errReader struct { data []byte @@ -36,23 +36,24 @@ func (r *errReader) Read(b []byte) (int, error) { return n, nil } -func newDetector(reader io.Reader) *detector { - return &detector{ - data: make([]byte, contentTypeDetectSize), - Reader: reader, +func NewDetector(reader io.Reader, detectFunc func([]byte) string) *Detector { + return &Detector{ + data: make([]byte, contentTypeDetectSize), + Reader: reader, + detectFunc: detectFunc, } } -func (d *detector) Detect() (string, error) { +func (d *Detector) Detect() (string, error) { n, err := d.Reader.Read(d.data) if err != nil && err != io.EOF { d.err = err return "", err } d.data = d.data[:n] - return http.DetectContentType(d.data), nil + return d.detectFunc(d.data), nil } -func (d *detector) MultiReader() io.Reader { +func (d *Detector) RestoredReader() io.Reader { return io.MultiReader(newReader(d.data, d.err), d.Reader) } diff --git a/playback/request.go b/playback/request.go index ac9e577..7343066 100644 --- a/playback/request.go +++ b/playback/request.go @@ -14,6 +14,7 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth" + "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector" "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils" v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4" "github.com/aws/aws-sdk-go-v2/credentials" @@ -49,11 +50,12 @@ func (h *httpBody) UnmarshalJSON(data []byte) error { if err != nil { return fmt.Errorf("failed to unquote data: %w", err) } - isXML, _, err := utils.DetectXML(strings.NewReader(unquoted)) + detect := detector.NewDetector(strings.NewReader(unquoted), utils.DetectXML) + dataType, err := detect.Detect() if err != nil { - return fmt.Errorf("failed to detect httpbody type: %w", err) + return fmt.Errorf("failed to detect data: %w", err) } - reader := utils.ChooseReader(isXML, strings.NewReader(unquoted)) + reader := utils.ChooseReader(dataType, detect.RestoredReader()) *h, err = io.ReadAll(reader) if err != nil { return fmt.Errorf("failed to unmarshal httpbody: %w", err) diff --git a/playback/utils/utils.go b/playback/utils/utils.go index 08347d3..b15f030 100644 --- a/playback/utils/utils.go +++ b/playback/utils/utils.go @@ -4,7 +4,6 @@ import ( "bytes" "encoding/base64" "encoding/xml" - "errors" "io" ) @@ -16,38 +15,35 @@ func (b nopCloseWriter) Close() error { return nil } -const BodyRecognizeLimit int64 = 128 +const ( + nonXML = "nonXML" + typeXML = "application/XML" +) -func DetectXML(reader io.Reader) (bool, []byte, error) { - detectBuf := bytes.NewBuffer(nil) - detectReader := io.TeeReader(io.LimitReader(reader, BodyRecognizeLimit), detectBuf) - token, err := xml.NewDecoder(detectReader).RawToken() +func DetectXML(data []byte) string { + token, err := xml.NewDecoder(bytes.NewReader(data)).RawToken() if err != nil { - var xmlErr *xml.SyntaxError - if errors.Is(err, io.EOF) || errors.As(err, &xmlErr) { - return false, detectBuf.Bytes(), nil - } - return false, detectBuf.Bytes(), err + return nonXML } switch token.(type) { case xml.StartElement, xml.ProcInst: - return true, detectBuf.Bytes(), nil + return typeXML } - return false, detectBuf.Bytes(), nil + return nonXML } -func ChooseWriter(isXML bool, bodyWriter io.Writer) io.WriteCloser { +func ChooseWriter(dataType string, bodyWriter io.Writer) io.WriteCloser { writeCloser := nopCloseWriter{bodyWriter} - if !isXML { - return base64.NewEncoder(base64.StdEncoding, bodyWriter) + if dataType == typeXML { + return writeCloser } - return writeCloser + return base64.NewEncoder(base64.StdEncoding, bodyWriter) } -func ChooseReader(isXML bool, bodyReader io.Reader) io.Reader { - if !isXML { - return base64.NewDecoder(base64.StdEncoding, bodyReader) +func ChooseReader(dataType string, bodyReader io.Reader) io.Reader { + if dataType == typeXML { + return bodyReader } - return bodyReader + return base64.NewDecoder(base64.StdEncoding, bodyReader) }