http: Fix handling of directories with & in

This was caused by inconsistent escaping of the URL in the prefix
check, so check the URL links back to the correct host and scheme
instead of the prefix check.

The decoded path check will catch any URLs which are outside of the
root.
This commit is contained in:
Nick Craig-Wood 2018-02-14 11:26:37 +00:00
parent 675e7c5d8e
commit 644313a4b9
2 changed files with 53 additions and 34 deletions

View file

@ -200,37 +200,52 @@ func parseInt64(s string) int64 {
return n
}
// parseName turns a name as found in the page into a remote path or returns false
func parseName(base *url.URL, name string) (string, bool) {
// Errors returned by parseName
var (
errURLJoinFailed = errors.New("URLJoin failed")
errFoundQuestionMark = errors.New("found ? in URL")
errHostMismatch = errors.New("host mismatch")
errSchemeMismatch = errors.New("scheme mismatch")
errNotUnderRoot = errors.New("not under root")
errNameIsEmpty = errors.New("name is empty")
errNameContainsSlash = errors.New("name contains /")
)
// parseName turns a name as found in the page into a remote path or returns an error
func parseName(base *url.URL, name string) (string, error) {
// make URL absolute
u, err := rest.URLJoin(base, name)
if err != nil {
return "", false
return "", errURLJoinFailed
}
// check it doesn't have URL parameters
uStr := u.String()
if strings.Index(uStr, "?") >= 0 {
return "", false
return "", errFoundQuestionMark
}
baseStr := base.String()
// check has URL prefix
if !strings.HasPrefix(uStr, baseStr) {
return "", false
// check that this is going back to the same host and scheme
if base.Host != u.Host {
return "", errHostMismatch
}
if base.Scheme != u.Scheme {
return "", errSchemeMismatch
}
// check has path prefix
if !strings.HasPrefix(u.Path, base.Path) {
return "", false
return "", errNotUnderRoot
}
// calculate the name relative to the base
name = u.Path[len(base.Path):]
// musn't be empty
if name == "" {
return "", false
return "", errNameIsEmpty
}
// mustn't contain a /
// mustn't contain a / - we are looking for a single level directory
slash := strings.Index(name, "/")
if slash >= 0 && slash != len(name)-1 {
return "", false
return "", errNameContainsSlash
}
return name, true
return name, nil
}
// Parse turns HTML for a directory into names
@ -245,8 +260,8 @@ func parse(base *url.URL, in io.Reader) (names []string, err error) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
name, ok := parseName(base, a.Val)
if ok {
name, err := parseName(base, a.Val)
if err == nil {
names = append(names, name)
}
break

View file

@ -209,28 +209,32 @@ func TestParseName(t *testing.T) {
for i, test := range []struct {
base string
val string
wantOK bool
wantErr error
want string
}{
{"http://example.com/", "potato", true, "potato"},
{"http://example.com/dir/", "potato", true, "potato"},
{"http://example.com/dir/", "../dir/potato", true, "potato"},
{"http://example.com/dir/", "..", false, ""},
{"http://example.com/dir/", "http://example.com/", false, ""},
{"http://example.com/dir/", "http://example.com/dir/", false, ""},
{"http://example.com/dir/", "http://example.com/dir/potato", true, "potato"},
{"http://example.com/dir/", "/dir/", false, ""},
{"http://example.com/dir/", "/dir/potato", true, "potato"},
{"http://example.com/dir/", "subdir/potato", false, ""},
{"http://example.com/dir/", "With percent %25.txt", true, "With percent %.txt"},
{"http://example.com/dir/", "With colon :", false, ""},
{"http://example.com/dir/", rest.URLPathEscape("With colon :"), true, "With colon :"},
{"http://example.com/", "potato", nil, "potato"},
{"http://example.com/dir/", "potato", nil, "potato"},
{"http://example.com/dir/", "potato?download=true", errFoundQuestionMark, ""},
{"http://example.com/dir/", "../dir/potato", nil, "potato"},
{"http://example.com/dir/", "..", errNotUnderRoot, ""},
{"http://example.com/dir/", "http://example.com/", errNotUnderRoot, ""},
{"http://example.com/dir/", "http://example.com/dir/", errNameIsEmpty, ""},
{"http://example.com/dir/", "http://example.com/dir/potato", nil, "potato"},
{"http://example.com/dir/", "https://example.com/dir/potato", errSchemeMismatch, ""},
{"http://example.com/dir/", "http://notexample.com/dir/potato", errHostMismatch, ""},
{"http://example.com/dir/", "/dir/", errNameIsEmpty, ""},
{"http://example.com/dir/", "/dir/potato", nil, "potato"},
{"http://example.com/dir/", "subdir/potato", errNameContainsSlash, ""},
{"http://example.com/dir/", "With percent %25.txt", nil, "With percent %.txt"},
{"http://example.com/dir/", "With colon :", errURLJoinFailed, ""},
{"http://example.com/dir/", rest.URLPathEscape("With colon :"), nil, "With colon :"},
{"http://example.com/Dungeons%20%26%20Dragons/", "/Dungeons%20&%20Dragons/D%26D%20Basic%20%28Holmes%2C%20B%2C%20X%2C%20BECMI%29/", nil, "D&D Basic (Holmes, B, X, BECMI)/"},
} {
u, err := url.Parse(test.base)
require.NoError(t, err)
got, gotOK := parseName(u, test.val)
got, gotErr := parseName(u, test.val)
what := fmt.Sprintf("test %d base=%q, val=%q", i, test.base, test.val)
assert.Equal(t, test.wantOK, gotOK, what)
assert.Equal(t, test.wantErr, gotErr, what)
assert.Equal(t, test.want, got, what)
}
}