[#188] Improve content-type detector
Signed-off-by: Aleksey Kravchenko <al.kravchenko@yadro.com>
This commit is contained in:
parent
a4e3767d4b
commit
a1097b7c59
3 changed files with 81 additions and 20 deletions
|
@ -45,7 +45,11 @@ func (h *Handler) headObject(ctx context.Context, req request, objectAddress oid
|
|||
}
|
||||
|
||||
req.Response.Header.Set(fasthttp.HeaderContentLength, strconv.FormatUint(obj.PayloadSize(), 10))
|
||||
var contentType string
|
||||
var (
|
||||
contentType string
|
||||
filename string
|
||||
filepath string
|
||||
)
|
||||
for _, attr := range obj.Attributes() {
|
||||
key := attr.Key()
|
||||
val := attr.Value()
|
||||
|
@ -69,8 +73,15 @@ func (h *Handler) headObject(ctx context.Context, req request, objectAddress oid
|
|||
req.Response.Header.Set(fasthttp.HeaderLastModified, time.Unix(value, 0).UTC().Format(http.TimeFormat))
|
||||
case object.AttributeContentType:
|
||||
contentType = val
|
||||
case object.AttributeFilePath:
|
||||
filepath = val
|
||||
case object.AttributeFileName:
|
||||
filename = val
|
||||
}
|
||||
}
|
||||
if filename == "" {
|
||||
filename = filepath
|
||||
}
|
||||
|
||||
idsToResponse(&req.Response, obj)
|
||||
|
||||
|
@ -85,7 +96,7 @@ func (h *Handler) headObject(ctx context.Context, req request, objectAddress oid
|
|||
}
|
||||
|
||||
return h.frostfs.RangeObject(ctx, prmRange)
|
||||
})
|
||||
}, filename)
|
||||
if err != nil && err != io.EOF {
|
||||
req.handleFrostFSErr(err, start)
|
||||
return
|
||||
|
|
|
@ -4,9 +4,11 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"mime"
|
||||
"net/http"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
||||
|
@ -25,7 +27,7 @@ type readCloser struct {
|
|||
|
||||
// initializes io.Reader with the limited size and detects Content-Type from it.
|
||||
// Returns r's error directly. Also returns the processed data.
|
||||
func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error)) (string, []byte, error) {
|
||||
func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error), filename string) (string, []byte, error) {
|
||||
if maxSize > sizeToDetectType {
|
||||
maxSize = sizeToDetectType
|
||||
}
|
||||
|
@ -44,7 +46,20 @@ func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error)) (str
|
|||
|
||||
buf = buf[:n]
|
||||
|
||||
return http.DetectContentType(buf), buf, err // to not lose io.EOF
|
||||
contentType := http.DetectContentType(buf)
|
||||
|
||||
// Since the detector detects the "text/plain" content type for various types of text files,
|
||||
// including CSS, JavaScript, and CSV files,
|
||||
// we'll determine the final content type based on the file's extension.
|
||||
if strings.HasPrefix(contentType, "text/plain") {
|
||||
ext := path.Ext(filename)
|
||||
// If the file doesn't have a file extension, we'll keep the content type as is.
|
||||
if len(ext) > 0 {
|
||||
contentType = mime.TypeByExtension(ext)
|
||||
}
|
||||
}
|
||||
|
||||
return contentType, buf, err // to not lose io.EOF
|
||||
}
|
||||
|
||||
type getMultiobjectBodyParams struct {
|
||||
|
@ -128,7 +143,7 @@ func (h *Handler) receiveFile(ctx context.Context, req request, objAddress oid.A
|
|||
|
||||
contentType, payloadHead, err = readContentType(payloadSize, func(uint64) (io.Reader, error) {
|
||||
return payload, nil
|
||||
})
|
||||
}, filename)
|
||||
if err != nil && err != io.EOF {
|
||||
req.log.Error(logs.CouldNotDetectContentTypeFromPayload, zap.Error(err))
|
||||
response.Error(req.RequestCtx, "could not detect Content-Type from payload: "+err.Error(), fasthttp.StatusBadRequest)
|
||||
|
|
|
@ -10,39 +10,74 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
txtContentType = "text/plain; charset=utf-8"
|
||||
cssContentType = "text/css; charset=utf-8"
|
||||
htmlContentType = "text/html; charset=utf-8"
|
||||
javascriptContentType = "text/javascript; charset=utf-8"
|
||||
|
||||
htmlBody = "<!DOCTYPE html><html ><head><meta charset=\"utf-8\"><title>Test Html</title>"
|
||||
)
|
||||
|
||||
func TestDetector(t *testing.T) {
|
||||
txtContentType := "text/plain; charset=utf-8"
|
||||
sb := strings.Builder{}
|
||||
for i := 0; i < 10; i++ {
|
||||
sb.WriteString("Some txt content. Content-Type must be detected properly by detector.")
|
||||
}
|
||||
|
||||
for _, tc := range []struct {
|
||||
Name string
|
||||
ContentType string
|
||||
Expected string
|
||||
Name string
|
||||
ExpectedContentType string
|
||||
Content string
|
||||
FileName string
|
||||
}{
|
||||
{
|
||||
Name: "less than 512b",
|
||||
ContentType: txtContentType,
|
||||
Expected: sb.String()[:256],
|
||||
Name: "less than 512b",
|
||||
ExpectedContentType: txtContentType,
|
||||
Content: sb.String()[:256],
|
||||
FileName: "test.txt",
|
||||
},
|
||||
{
|
||||
Name: "more than 512b",
|
||||
ContentType: txtContentType,
|
||||
Expected: sb.String(),
|
||||
Name: "more than 512b",
|
||||
ExpectedContentType: txtContentType,
|
||||
Content: sb.String(),
|
||||
FileName: "test.txt",
|
||||
},
|
||||
{
|
||||
Name: "css content type",
|
||||
ExpectedContentType: cssContentType,
|
||||
Content: sb.String(),
|
||||
FileName: "test.css",
|
||||
},
|
||||
{
|
||||
Name: "javascript content type",
|
||||
ExpectedContentType: javascriptContentType,
|
||||
Content: sb.String(),
|
||||
FileName: "test.js",
|
||||
},
|
||||
{
|
||||
Name: "html content type by file content",
|
||||
ExpectedContentType: htmlContentType,
|
||||
Content: htmlBody,
|
||||
FileName: "test.detect-by-content",
|
||||
},
|
||||
{
|
||||
Name: "html content type by file extension",
|
||||
ExpectedContentType: htmlContentType,
|
||||
Content: htmlBody,
|
||||
FileName: "test.html",
|
||||
},
|
||||
} {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
contentType, data, err := readContentType(uint64(len(tc.Expected)),
|
||||
contentType, data, err := readContentType(uint64(len(tc.Content)),
|
||||
func(uint64) (io.Reader, error) {
|
||||
return strings.NewReader(tc.Expected), nil
|
||||
},
|
||||
return strings.NewReader(tc.Content), nil
|
||||
}, tc.FileName,
|
||||
)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.ContentType, contentType)
|
||||
require.True(t, strings.HasPrefix(tc.Expected, string(data)))
|
||||
require.Equal(t, tc.ExpectedContentType, contentType)
|
||||
require.True(t, strings.HasPrefix(tc.Content, string(data)))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue