[#369] Modify data type detector
Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
parent
c03ff85fa4
commit
05fec217d7
6 changed files with 50 additions and 52 deletions
|
@ -12,6 +12,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
@ -21,6 +22,7 @@ import (
|
|||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data"
|
||||
apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
|
@ -245,11 +247,11 @@ func (n *Layer) PutObject(ctx context.Context, p *PutObjectParams) (*data.Extend
|
|||
if r != nil {
|
||||
if len(p.Header[api.ContentType]) == 0 {
|
||||
if contentType := MimeByFilePath(p.Object); len(contentType) == 0 {
|
||||
d := newDetector(r)
|
||||
d := detector.NewDetector(r, http.DetectContentType)
|
||||
if contentType, err := d.Detect(); err == nil {
|
||||
p.Header[api.ContentType] = contentType
|
||||
}
|
||||
r = d.MultiReader()
|
||||
r = d.RestoredReader()
|
||||
} else {
|
||||
p.Header[api.ContentType] = contentType
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"net/url"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
|
@ -211,6 +212,7 @@ func (f *fileLogger) newLoggerConfig() zap.Config {
|
|||
c.EncoderConfig.LevelKey = zapcore.OmitKey
|
||||
c.EncoderConfig.TimeKey = zapcore.OmitKey
|
||||
c.EncoderConfig.FunctionKey = zapcore.OmitKey
|
||||
c.Sampling = nil
|
||||
|
||||
return c
|
||||
}
|
||||
|
@ -231,16 +233,13 @@ func getBody(httpBody io.ReadCloser, l *zap.Logger) []byte {
|
|||
// processBody reads body and base64 encode it if it's not XML.
|
||||
func processBody(bodyReader io.Reader) ([]byte, error) {
|
||||
resultBody := &bytes.Buffer{}
|
||||
isXML, checkedBytes, err := utils.DetectXML(bodyReader)
|
||||
detect := detector.NewDetector(bodyReader, utils.DetectXML)
|
||||
dataType, err := detect.Detect()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
writer := utils.ChooseWriter(isXML, resultBody)
|
||||
_, err = writer.Write(checkedBytes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err = io.Copy(writer, bodyReader); err != nil {
|
||||
writer := utils.ChooseWriter(dataType, resultBody)
|
||||
if _, err = io.Copy(writer, detect.RestoredReader()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = writer.Close(); err != nil {
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
||||
"github.com/spf13/cobra"
|
||||
|
@ -62,18 +63,15 @@ func logResponse(cmd *cobra.Command, r *http.Request, resp *http.Response) {
|
|||
if resp.ContentLength == 0 {
|
||||
return
|
||||
}
|
||||
isXML, checkBuf, err := utils.DetectXML(resp.Body)
|
||||
detect := detector.NewDetector(resp.Body, utils.DetectXML)
|
||||
dataType, err := detect.Detect()
|
||||
if err != nil {
|
||||
cmd.Println(err.Error())
|
||||
return
|
||||
}
|
||||
body := &bytes.Buffer{}
|
||||
resultWriter := utils.ChooseWriter(isXML, body)
|
||||
if _, err = resultWriter.Write(checkBuf); err != nil {
|
||||
cmd.Println(err)
|
||||
return
|
||||
}
|
||||
_, err = io.Copy(resultWriter, io.LimitReader(resp.Body, viper.GetInt64(printResponseLimit)))
|
||||
resultWriter := utils.ChooseWriter(dataType, body)
|
||||
_, err = io.Copy(resultWriter, io.LimitReader(detect.RestoredReader(), viper.GetInt64(printResponseLimit)))
|
||||
if err != nil {
|
||||
cmd.Println(err)
|
||||
return
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
package layer
|
||||
package detector
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type (
|
||||
detector struct {
|
||||
Detector struct {
|
||||
io.Reader
|
||||
err error
|
||||
data []byte
|
||||
detectFunc func([]byte) string
|
||||
}
|
||||
errReader struct {
|
||||
data []byte
|
||||
|
@ -36,23 +36,24 @@ func (r *errReader) Read(b []byte) (int, error) {
|
|||
return n, nil
|
||||
}
|
||||
|
||||
func newDetector(reader io.Reader) *detector {
|
||||
return &detector{
|
||||
func NewDetector(reader io.Reader, detectFunc func([]byte) string) *Detector {
|
||||
return &Detector{
|
||||
data: make([]byte, contentTypeDetectSize),
|
||||
Reader: reader,
|
||||
detectFunc: detectFunc,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *detector) Detect() (string, error) {
|
||||
func (d *Detector) Detect() (string, error) {
|
||||
n, err := d.Reader.Read(d.data)
|
||||
if err != nil && err != io.EOF {
|
||||
d.err = err
|
||||
return "", err
|
||||
}
|
||||
d.data = d.data[:n]
|
||||
return http.DetectContentType(d.data), nil
|
||||
return d.detectFunc(d.data), nil
|
||||
}
|
||||
|
||||
func (d *detector) MultiReader() io.Reader {
|
||||
func (d *Detector) RestoredReader() io.Reader {
|
||||
return io.MultiReader(newReader(d.data, d.err), d.Reader)
|
||||
}
|
|
@ -14,6 +14,7 @@ import (
|
|||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
||||
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
|
||||
"github.com/aws/aws-sdk-go-v2/credentials"
|
||||
|
@ -49,11 +50,12 @@ func (h *httpBody) UnmarshalJSON(data []byte) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("failed to unquote data: %w", err)
|
||||
}
|
||||
isXML, _, err := utils.DetectXML(strings.NewReader(unquoted))
|
||||
detect := detector.NewDetector(strings.NewReader(unquoted), utils.DetectXML)
|
||||
dataType, err := detect.Detect()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to detect httpbody type: %w", err)
|
||||
return fmt.Errorf("failed to detect data: %w", err)
|
||||
}
|
||||
reader := utils.ChooseReader(isXML, strings.NewReader(unquoted))
|
||||
reader := utils.ChooseReader(dataType, detect.RestoredReader())
|
||||
*h, err = io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarshal httpbody: %w", err)
|
||||
|
|
|
@ -4,7 +4,6 @@ import (
|
|||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
|
@ -16,38 +15,35 @@ func (b nopCloseWriter) Close() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
const BodyRecognizeLimit int64 = 128
|
||||
const (
|
||||
nonXML = "nonXML"
|
||||
typeXML = "application/XML"
|
||||
)
|
||||
|
||||
func DetectXML(reader io.Reader) (bool, []byte, error) {
|
||||
detectBuf := bytes.NewBuffer(nil)
|
||||
detectReader := io.TeeReader(io.LimitReader(reader, BodyRecognizeLimit), detectBuf)
|
||||
token, err := xml.NewDecoder(detectReader).RawToken()
|
||||
func DetectXML(data []byte) string {
|
||||
token, err := xml.NewDecoder(bytes.NewReader(data)).RawToken()
|
||||
if err != nil {
|
||||
var xmlErr *xml.SyntaxError
|
||||
if errors.Is(err, io.EOF) || errors.As(err, &xmlErr) {
|
||||
return false, detectBuf.Bytes(), nil
|
||||
}
|
||||
return false, detectBuf.Bytes(), err
|
||||
return nonXML
|
||||
}
|
||||
|
||||
switch token.(type) {
|
||||
case xml.StartElement, xml.ProcInst:
|
||||
return true, detectBuf.Bytes(), nil
|
||||
return typeXML
|
||||
}
|
||||
return false, detectBuf.Bytes(), nil
|
||||
return nonXML
|
||||
}
|
||||
|
||||
func ChooseWriter(isXML bool, bodyWriter io.Writer) io.WriteCloser {
|
||||
func ChooseWriter(dataType string, bodyWriter io.Writer) io.WriteCloser {
|
||||
writeCloser := nopCloseWriter{bodyWriter}
|
||||
if !isXML {
|
||||
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
|
||||
}
|
||||
if dataType == typeXML {
|
||||
return writeCloser
|
||||
}
|
||||
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
|
||||
}
|
||||
|
||||
func ChooseReader(isXML bool, bodyReader io.Reader) io.Reader {
|
||||
if !isXML {
|
||||
return base64.NewDecoder(base64.StdEncoding, bodyReader)
|
||||
}
|
||||
func ChooseReader(dataType string, bodyReader io.Reader) io.Reader {
|
||||
if dataType == typeXML {
|
||||
return bodyReader
|
||||
}
|
||||
return base64.NewDecoder(base64.StdEncoding, bodyReader)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue