forked from TrueCloudLab/frostfs-s3-gw
[#369] Modify data type detector
Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
This commit is contained in:
parent
4177898f97
commit
4ff049f480
6 changed files with 50 additions and 52 deletions
|
@ -12,6 +12,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"mime"
|
"mime"
|
||||||
|
"net/http"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -21,6 +22,7 @@ import (
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/data"
|
||||||
apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors"
|
apiErrors "git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/errors"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
|
@ -237,11 +239,11 @@ func (n *layer) PutObject(ctx context.Context, p *PutObjectParams) (*data.Extend
|
||||||
if r != nil {
|
if r != nil {
|
||||||
if len(p.Header[api.ContentType]) == 0 {
|
if len(p.Header[api.ContentType]) == 0 {
|
||||||
if contentType := MimeByFilePath(p.Object); len(contentType) == 0 {
|
if contentType := MimeByFilePath(p.Object); len(contentType) == 0 {
|
||||||
d := newDetector(r)
|
d := detector.NewDetector(r, http.DetectContentType)
|
||||||
if contentType, err := d.Detect(); err == nil {
|
if contentType, err := d.Detect(); err == nil {
|
||||||
p.Header[api.ContentType] = contentType
|
p.Header[api.ContentType] = contentType
|
||||||
}
|
}
|
||||||
r = d.MultiReader()
|
r = d.RestoredReader()
|
||||||
} else {
|
} else {
|
||||||
p.Header[api.ContentType] = contentType
|
p.Header[api.ContentType] = contentType
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/internal/logs"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"go.uber.org/zap/zapcore"
|
"go.uber.org/zap/zapcore"
|
||||||
|
@ -211,6 +212,7 @@ func (f *fileLogger) newLoggerConfig() zap.Config {
|
||||||
c.EncoderConfig.LevelKey = zapcore.OmitKey
|
c.EncoderConfig.LevelKey = zapcore.OmitKey
|
||||||
c.EncoderConfig.TimeKey = zapcore.OmitKey
|
c.EncoderConfig.TimeKey = zapcore.OmitKey
|
||||||
c.EncoderConfig.FunctionKey = zapcore.OmitKey
|
c.EncoderConfig.FunctionKey = zapcore.OmitKey
|
||||||
|
c.Sampling = nil
|
||||||
|
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
@ -231,16 +233,13 @@ func getBody(httpBody io.ReadCloser, l *zap.Logger) []byte {
|
||||||
// processBody reads body and base64 encode it if it's not XML.
|
// processBody reads body and base64 encode it if it's not XML.
|
||||||
func processBody(bodyReader io.Reader) ([]byte, error) {
|
func processBody(bodyReader io.Reader) ([]byte, error) {
|
||||||
resultBody := &bytes.Buffer{}
|
resultBody := &bytes.Buffer{}
|
||||||
isXML, checkedBytes, err := utils.DetectXML(bodyReader)
|
detect := detector.NewDetector(bodyReader, utils.DetectXML)
|
||||||
|
dataType, err := detect.Detect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
writer := utils.ChooseWriter(isXML, resultBody)
|
writer := utils.ChooseWriter(dataType, resultBody)
|
||||||
_, err = writer.Write(checkedBytes)
|
if _, err = io.Copy(writer, detect.RestoredReader()); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if _, err = io.Copy(writer, bodyReader); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err = writer.Close(); err != nil {
|
if err = writer.Close(); err != nil {
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
@ -62,18 +63,15 @@ func logResponse(cmd *cobra.Command, r *http.Request, resp *http.Response) {
|
||||||
if resp.ContentLength == 0 {
|
if resp.ContentLength == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
isXML, checkBuf, err := utils.DetectXML(resp.Body)
|
detect := detector.NewDetector(resp.Body, utils.DetectXML)
|
||||||
|
dataType, err := detect.Detect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cmd.Println(err.Error())
|
cmd.Println(err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
body := &bytes.Buffer{}
|
body := &bytes.Buffer{}
|
||||||
resultWriter := utils.ChooseWriter(isXML, body)
|
resultWriter := utils.ChooseWriter(dataType, body)
|
||||||
if _, err = resultWriter.Write(checkBuf); err != nil {
|
_, err = io.Copy(resultWriter, io.LimitReader(detect.RestoredReader(), viper.GetInt64(printResponseLimit)))
|
||||||
cmd.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
_, err = io.Copy(resultWriter, io.LimitReader(resp.Body, viper.GetInt64(printResponseLimit)))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cmd.Println(err)
|
cmd.Println(err)
|
||||||
return
|
return
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
package layer
|
package detector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type (
|
||||||
detector struct {
|
Detector struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
err error
|
err error
|
||||||
data []byte
|
data []byte
|
||||||
|
detectFunc func([]byte) string
|
||||||
}
|
}
|
||||||
errReader struct {
|
errReader struct {
|
||||||
data []byte
|
data []byte
|
||||||
|
@ -36,23 +36,24 @@ func (r *errReader) Read(b []byte) (int, error) {
|
||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDetector(reader io.Reader) *detector {
|
func NewDetector(reader io.Reader, detectFunc func([]byte) string) *Detector {
|
||||||
return &detector{
|
return &Detector{
|
||||||
data: make([]byte, contentTypeDetectSize),
|
data: make([]byte, contentTypeDetectSize),
|
||||||
Reader: reader,
|
Reader: reader,
|
||||||
|
detectFunc: detectFunc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *detector) Detect() (string, error) {
|
func (d *Detector) Detect() (string, error) {
|
||||||
n, err := d.Reader.Read(d.data)
|
n, err := d.Reader.Read(d.data)
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
d.err = err
|
d.err = err
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
d.data = d.data[:n]
|
d.data = d.data[:n]
|
||||||
return http.DetectContentType(d.data), nil
|
return d.detectFunc(d.data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *detector) MultiReader() io.Reader {
|
func (d *Detector) RestoredReader() io.Reader {
|
||||||
return io.MultiReader(newReader(d.data, d.err), d.Reader)
|
return io.MultiReader(newReader(d.data, d.err), d.Reader)
|
||||||
}
|
}
|
|
@ -14,6 +14,7 @@ import (
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/api/auth"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/pkg/detector"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
"git.frostfs.info/TrueCloudLab/frostfs-s3-gw/playback/utils"
|
||||||
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
|
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
|
||||||
"github.com/aws/aws-sdk-go-v2/credentials"
|
"github.com/aws/aws-sdk-go-v2/credentials"
|
||||||
|
@ -49,11 +50,12 @@ func (h *httpBody) UnmarshalJSON(data []byte) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to unquote data: %w", err)
|
return fmt.Errorf("failed to unquote data: %w", err)
|
||||||
}
|
}
|
||||||
isXML, _, err := utils.DetectXML(strings.NewReader(unquoted))
|
detect := detector.NewDetector(strings.NewReader(unquoted), utils.DetectXML)
|
||||||
|
dataType, err := detect.Detect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to detect httpbody type: %w", err)
|
return fmt.Errorf("failed to detect data: %w", err)
|
||||||
}
|
}
|
||||||
reader := utils.ChooseReader(isXML, strings.NewReader(unquoted))
|
reader := utils.ChooseReader(dataType, detect.RestoredReader())
|
||||||
*h, err = io.ReadAll(reader)
|
*h, err = io.ReadAll(reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to unmarshal httpbody: %w", err)
|
return fmt.Errorf("failed to unmarshal httpbody: %w", err)
|
||||||
|
|
|
@ -4,7 +4,6 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"errors"
|
|
||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -16,38 +15,35 @@ func (b nopCloseWriter) Close() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
const BodyRecognizeLimit int64 = 128
|
const (
|
||||||
|
nonXML = "nonXML"
|
||||||
|
typeXML = "application/XML"
|
||||||
|
)
|
||||||
|
|
||||||
func DetectXML(reader io.Reader) (bool, []byte, error) {
|
func DetectXML(data []byte) string {
|
||||||
detectBuf := bytes.NewBuffer(nil)
|
token, err := xml.NewDecoder(bytes.NewReader(data)).RawToken()
|
||||||
detectReader := io.TeeReader(io.LimitReader(reader, BodyRecognizeLimit), detectBuf)
|
|
||||||
token, err := xml.NewDecoder(detectReader).RawToken()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
var xmlErr *xml.SyntaxError
|
return nonXML
|
||||||
if errors.Is(err, io.EOF) || errors.As(err, &xmlErr) {
|
|
||||||
return false, detectBuf.Bytes(), nil
|
|
||||||
}
|
|
||||||
return false, detectBuf.Bytes(), err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch token.(type) {
|
switch token.(type) {
|
||||||
case xml.StartElement, xml.ProcInst:
|
case xml.StartElement, xml.ProcInst:
|
||||||
return true, detectBuf.Bytes(), nil
|
return typeXML
|
||||||
}
|
}
|
||||||
return false, detectBuf.Bytes(), nil
|
return nonXML
|
||||||
}
|
}
|
||||||
|
|
||||||
func ChooseWriter(isXML bool, bodyWriter io.Writer) io.WriteCloser {
|
func ChooseWriter(dataType string, bodyWriter io.Writer) io.WriteCloser {
|
||||||
writeCloser := nopCloseWriter{bodyWriter}
|
writeCloser := nopCloseWriter{bodyWriter}
|
||||||
if !isXML {
|
if dataType == typeXML {
|
||||||
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
|
|
||||||
}
|
|
||||||
return writeCloser
|
return writeCloser
|
||||||
}
|
}
|
||||||
|
return base64.NewEncoder(base64.StdEncoding, bodyWriter)
|
||||||
func ChooseReader(isXML bool, bodyReader io.Reader) io.Reader {
|
|
||||||
if !isXML {
|
|
||||||
return base64.NewDecoder(base64.StdEncoding, bodyReader)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ChooseReader(dataType string, bodyReader io.Reader) io.Reader {
|
||||||
|
if dataType == typeXML {
|
||||||
return bodyReader
|
return bodyReader
|
||||||
}
|
}
|
||||||
|
return base64.NewDecoder(base64.StdEncoding, bodyReader)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue