[#188] Improve content-type detector
Signed-off-by: Aleksey Kravchenko <al.kravchenko@yadro.com>
This commit is contained in:
parent
a4e3767d4b
commit
a1097b7c59
3 changed files with 81 additions and 20 deletions
|
@ -45,7 +45,11 @@ func (h *Handler) headObject(ctx context.Context, req request, objectAddress oid
|
||||||
}
|
}
|
||||||
|
|
||||||
req.Response.Header.Set(fasthttp.HeaderContentLength, strconv.FormatUint(obj.PayloadSize(), 10))
|
req.Response.Header.Set(fasthttp.HeaderContentLength, strconv.FormatUint(obj.PayloadSize(), 10))
|
||||||
var contentType string
|
var (
|
||||||
|
contentType string
|
||||||
|
filename string
|
||||||
|
filepath string
|
||||||
|
)
|
||||||
for _, attr := range obj.Attributes() {
|
for _, attr := range obj.Attributes() {
|
||||||
key := attr.Key()
|
key := attr.Key()
|
||||||
val := attr.Value()
|
val := attr.Value()
|
||||||
|
@ -69,8 +73,15 @@ func (h *Handler) headObject(ctx context.Context, req request, objectAddress oid
|
||||||
req.Response.Header.Set(fasthttp.HeaderLastModified, time.Unix(value, 0).UTC().Format(http.TimeFormat))
|
req.Response.Header.Set(fasthttp.HeaderLastModified, time.Unix(value, 0).UTC().Format(http.TimeFormat))
|
||||||
case object.AttributeContentType:
|
case object.AttributeContentType:
|
||||||
contentType = val
|
contentType = val
|
||||||
|
case object.AttributeFilePath:
|
||||||
|
filepath = val
|
||||||
|
case object.AttributeFileName:
|
||||||
|
filename = val
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if filename == "" {
|
||||||
|
filename = filepath
|
||||||
|
}
|
||||||
|
|
||||||
idsToResponse(&req.Response, obj)
|
idsToResponse(&req.Response, obj)
|
||||||
|
|
||||||
|
@ -85,7 +96,7 @@ func (h *Handler) headObject(ctx context.Context, req request, objectAddress oid
|
||||||
}
|
}
|
||||||
|
|
||||||
return h.frostfs.RangeObject(ctx, prmRange)
|
return h.frostfs.RangeObject(ctx, prmRange)
|
||||||
})
|
}, filename)
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
req.handleFrostFSErr(err, start)
|
req.handleFrostFSErr(err, start)
|
||||||
return
|
return
|
||||||
|
|
|
@ -4,9 +4,11 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
|
"mime"
|
||||||
"net/http"
|
"net/http"
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-http-gw/internal/logs"
|
||||||
|
@ -25,7 +27,7 @@ type readCloser struct {
|
||||||
|
|
||||||
// initializes io.Reader with the limited size and detects Content-Type from it.
|
// initializes io.Reader with the limited size and detects Content-Type from it.
|
||||||
// Returns r's error directly. Also returns the processed data.
|
// Returns r's error directly. Also returns the processed data.
|
||||||
func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error)) (string, []byte, error) {
|
func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error), filename string) (string, []byte, error) {
|
||||||
if maxSize > sizeToDetectType {
|
if maxSize > sizeToDetectType {
|
||||||
maxSize = sizeToDetectType
|
maxSize = sizeToDetectType
|
||||||
}
|
}
|
||||||
|
@ -44,7 +46,20 @@ func readContentType(maxSize uint64, rInit func(uint64) (io.Reader, error)) (str
|
||||||
|
|
||||||
buf = buf[:n]
|
buf = buf[:n]
|
||||||
|
|
||||||
return http.DetectContentType(buf), buf, err // to not lose io.EOF
|
contentType := http.DetectContentType(buf)
|
||||||
|
|
||||||
|
// Since the detector detects the "text/plain" content type for various types of text files,
|
||||||
|
// including CSS, JavaScript, and CSV files,
|
||||||
|
// we'll determine the final content type based on the file's extension.
|
||||||
|
if strings.HasPrefix(contentType, "text/plain") {
|
||||||
|
ext := path.Ext(filename)
|
||||||
|
// If the file doesn't have a file extension, we'll keep the content type as is.
|
||||||
|
if len(ext) > 0 {
|
||||||
|
contentType = mime.TypeByExtension(ext)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return contentType, buf, err // to not lose io.EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
type getMultiobjectBodyParams struct {
|
type getMultiobjectBodyParams struct {
|
||||||
|
@ -128,7 +143,7 @@ func (h *Handler) receiveFile(ctx context.Context, req request, objAddress oid.A
|
||||||
|
|
||||||
contentType, payloadHead, err = readContentType(payloadSize, func(uint64) (io.Reader, error) {
|
contentType, payloadHead, err = readContentType(payloadSize, func(uint64) (io.Reader, error) {
|
||||||
return payload, nil
|
return payload, nil
|
||||||
})
|
}, filename)
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
req.log.Error(logs.CouldNotDetectContentTypeFromPayload, zap.Error(err))
|
req.log.Error(logs.CouldNotDetectContentTypeFromPayload, zap.Error(err))
|
||||||
response.Error(req.RequestCtx, "could not detect Content-Type from payload: "+err.Error(), fasthttp.StatusBadRequest)
|
response.Error(req.RequestCtx, "could not detect Content-Type from payload: "+err.Error(), fasthttp.StatusBadRequest)
|
||||||
|
|
|
@ -10,39 +10,74 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
txtContentType = "text/plain; charset=utf-8"
|
||||||
|
cssContentType = "text/css; charset=utf-8"
|
||||||
|
htmlContentType = "text/html; charset=utf-8"
|
||||||
|
javascriptContentType = "text/javascript; charset=utf-8"
|
||||||
|
|
||||||
|
htmlBody = "<!DOCTYPE html><html ><head><meta charset=\"utf-8\"><title>Test Html</title>"
|
||||||
|
)
|
||||||
|
|
||||||
func TestDetector(t *testing.T) {
|
func TestDetector(t *testing.T) {
|
||||||
txtContentType := "text/plain; charset=utf-8"
|
|
||||||
sb := strings.Builder{}
|
sb := strings.Builder{}
|
||||||
for i := 0; i < 10; i++ {
|
for i := 0; i < 10; i++ {
|
||||||
sb.WriteString("Some txt content. Content-Type must be detected properly by detector.")
|
sb.WriteString("Some txt content. Content-Type must be detected properly by detector.")
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range []struct {
|
for _, tc := range []struct {
|
||||||
Name string
|
Name string
|
||||||
ContentType string
|
ExpectedContentType string
|
||||||
Expected string
|
Content string
|
||||||
|
FileName string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
Name: "less than 512b",
|
Name: "less than 512b",
|
||||||
ContentType: txtContentType,
|
ExpectedContentType: txtContentType,
|
||||||
Expected: sb.String()[:256],
|
Content: sb.String()[:256],
|
||||||
|
FileName: "test.txt",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "more than 512b",
|
Name: "more than 512b",
|
||||||
ContentType: txtContentType,
|
ExpectedContentType: txtContentType,
|
||||||
Expected: sb.String(),
|
Content: sb.String(),
|
||||||
|
FileName: "test.txt",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "css content type",
|
||||||
|
ExpectedContentType: cssContentType,
|
||||||
|
Content: sb.String(),
|
||||||
|
FileName: "test.css",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "javascript content type",
|
||||||
|
ExpectedContentType: javascriptContentType,
|
||||||
|
Content: sb.String(),
|
||||||
|
FileName: "test.js",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "html content type by file content",
|
||||||
|
ExpectedContentType: htmlContentType,
|
||||||
|
Content: htmlBody,
|
||||||
|
FileName: "test.detect-by-content",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "html content type by file extension",
|
||||||
|
ExpectedContentType: htmlContentType,
|
||||||
|
Content: htmlBody,
|
||||||
|
FileName: "test.html",
|
||||||
},
|
},
|
||||||
} {
|
} {
|
||||||
t.Run(tc.Name, func(t *testing.T) {
|
t.Run(tc.Name, func(t *testing.T) {
|
||||||
contentType, data, err := readContentType(uint64(len(tc.Expected)),
|
contentType, data, err := readContentType(uint64(len(tc.Content)),
|
||||||
func(uint64) (io.Reader, error) {
|
func(uint64) (io.Reader, error) {
|
||||||
return strings.NewReader(tc.Expected), nil
|
return strings.NewReader(tc.Content), nil
|
||||||
},
|
}, tc.FileName,
|
||||||
)
|
)
|
||||||
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, tc.ContentType, contentType)
|
require.Equal(t, tc.ExpectedContentType, contentType)
|
||||||
require.True(t, strings.HasPrefix(tc.Expected, string(data)))
|
require.True(t, strings.HasPrefix(tc.Content, string(data)))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue