http: Fix handling of directories with & in
This was caused by inconsistent escaping of the URL in the prefix check, so check the URL links back to the correct host and scheme instead of the prefix check. The decoded path check will catch any URLs which are outside of the root.
This commit is contained in:
parent
675e7c5d8e
commit
644313a4b9
2 changed files with 53 additions and 34 deletions
|
@ -200,37 +200,52 @@ func parseInt64(s string) int64 {
|
|||
return n
|
||||
}
|
||||
|
||||
// parseName turns a name as found in the page into a remote path or returns false
|
||||
func parseName(base *url.URL, name string) (string, bool) {
|
||||
// Errors returned by parseName
|
||||
var (
|
||||
errURLJoinFailed = errors.New("URLJoin failed")
|
||||
errFoundQuestionMark = errors.New("found ? in URL")
|
||||
errHostMismatch = errors.New("host mismatch")
|
||||
errSchemeMismatch = errors.New("scheme mismatch")
|
||||
errNotUnderRoot = errors.New("not under root")
|
||||
errNameIsEmpty = errors.New("name is empty")
|
||||
errNameContainsSlash = errors.New("name contains /")
|
||||
)
|
||||
|
||||
// parseName turns a name as found in the page into a remote path or returns an error
|
||||
func parseName(base *url.URL, name string) (string, error) {
|
||||
// make URL absolute
|
||||
u, err := rest.URLJoin(base, name)
|
||||
if err != nil {
|
||||
return "", false
|
||||
return "", errURLJoinFailed
|
||||
}
|
||||
// check it doesn't have URL parameters
|
||||
uStr := u.String()
|
||||
if strings.Index(uStr, "?") >= 0 {
|
||||
return "", false
|
||||
return "", errFoundQuestionMark
|
||||
}
|
||||
baseStr := base.String()
|
||||
// check has URL prefix
|
||||
if !strings.HasPrefix(uStr, baseStr) {
|
||||
return "", false
|
||||
// check that this is going back to the same host and scheme
|
||||
if base.Host != u.Host {
|
||||
return "", errHostMismatch
|
||||
}
|
||||
if base.Scheme != u.Scheme {
|
||||
return "", errSchemeMismatch
|
||||
}
|
||||
// check has path prefix
|
||||
if !strings.HasPrefix(u.Path, base.Path) {
|
||||
return "", false
|
||||
return "", errNotUnderRoot
|
||||
}
|
||||
// calculate the name relative to the base
|
||||
name = u.Path[len(base.Path):]
|
||||
// musn't be empty
|
||||
if name == "" {
|
||||
return "", false
|
||||
return "", errNameIsEmpty
|
||||
}
|
||||
// mustn't contain a /
|
||||
// mustn't contain a / - we are looking for a single level directory
|
||||
slash := strings.Index(name, "/")
|
||||
if slash >= 0 && slash != len(name)-1 {
|
||||
return "", false
|
||||
return "", errNameContainsSlash
|
||||
}
|
||||
return name, true
|
||||
return name, nil
|
||||
}
|
||||
|
||||
// Parse turns HTML for a directory into names
|
||||
|
@ -245,8 +260,8 @@ func parse(base *url.URL, in io.Reader) (names []string, err error) {
|
|||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "href" {
|
||||
name, ok := parseName(base, a.Val)
|
||||
if ok {
|
||||
name, err := parseName(base, a.Val)
|
||||
if err == nil {
|
||||
names = append(names, name)
|
||||
}
|
||||
break
|
||||
|
|
|
@ -209,28 +209,32 @@ func TestParseName(t *testing.T) {
|
|||
for i, test := range []struct {
|
||||
base string
|
||||
val string
|
||||
wantOK bool
|
||||
wantErr error
|
||||
want string
|
||||
}{
|
||||
{"http://example.com/", "potato", true, "potato"},
|
||||
{"http://example.com/dir/", "potato", true, "potato"},
|
||||
{"http://example.com/dir/", "../dir/potato", true, "potato"},
|
||||
{"http://example.com/dir/", "..", false, ""},
|
||||
{"http://example.com/dir/", "http://example.com/", false, ""},
|
||||
{"http://example.com/dir/", "http://example.com/dir/", false, ""},
|
||||
{"http://example.com/dir/", "http://example.com/dir/potato", true, "potato"},
|
||||
{"http://example.com/dir/", "/dir/", false, ""},
|
||||
{"http://example.com/dir/", "/dir/potato", true, "potato"},
|
||||
{"http://example.com/dir/", "subdir/potato", false, ""},
|
||||
{"http://example.com/dir/", "With percent %25.txt", true, "With percent %.txt"},
|
||||
{"http://example.com/dir/", "With colon :", false, ""},
|
||||
{"http://example.com/dir/", rest.URLPathEscape("With colon :"), true, "With colon :"},
|
||||
{"http://example.com/", "potato", nil, "potato"},
|
||||
{"http://example.com/dir/", "potato", nil, "potato"},
|
||||
{"http://example.com/dir/", "potato?download=true", errFoundQuestionMark, ""},
|
||||
{"http://example.com/dir/", "../dir/potato", nil, "potato"},
|
||||
{"http://example.com/dir/", "..", errNotUnderRoot, ""},
|
||||
{"http://example.com/dir/", "http://example.com/", errNotUnderRoot, ""},
|
||||
{"http://example.com/dir/", "http://example.com/dir/", errNameIsEmpty, ""},
|
||||
{"http://example.com/dir/", "http://example.com/dir/potato", nil, "potato"},
|
||||
{"http://example.com/dir/", "https://example.com/dir/potato", errSchemeMismatch, ""},
|
||||
{"http://example.com/dir/", "http://notexample.com/dir/potato", errHostMismatch, ""},
|
||||
{"http://example.com/dir/", "/dir/", errNameIsEmpty, ""},
|
||||
{"http://example.com/dir/", "/dir/potato", nil, "potato"},
|
||||
{"http://example.com/dir/", "subdir/potato", errNameContainsSlash, ""},
|
||||
{"http://example.com/dir/", "With percent %25.txt", nil, "With percent %.txt"},
|
||||
{"http://example.com/dir/", "With colon :", errURLJoinFailed, ""},
|
||||
{"http://example.com/dir/", rest.URLPathEscape("With colon :"), nil, "With colon :"},
|
||||
{"http://example.com/Dungeons%20%26%20Dragons/", "/Dungeons%20&%20Dragons/D%26D%20Basic%20%28Holmes%2C%20B%2C%20X%2C%20BECMI%29/", nil, "D&D Basic (Holmes, B, X, BECMI)/"},
|
||||
} {
|
||||
u, err := url.Parse(test.base)
|
||||
require.NoError(t, err)
|
||||
got, gotOK := parseName(u, test.val)
|
||||
got, gotErr := parseName(u, test.val)
|
||||
what := fmt.Sprintf("test %d base=%q, val=%q", i, test.base, test.val)
|
||||
assert.Equal(t, test.wantOK, gotOK, what)
|
||||
assert.Equal(t, test.wantErr, gotErr, what)
|
||||
assert.Equal(t, test.want, got, what)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue