http: Fix handling of directories with & in

This was caused by inconsistent escaping of the URL in the prefix
check, so check the URL links back to the correct host and scheme
instead of the prefix check.

The decoded path check will catch any URLs which are outside of the
root.
This commit is contained in:
Nick Craig-Wood 2018-02-14 11:26:37 +00:00
parent 675e7c5d8e
commit 644313a4b9
2 changed files with 53 additions and 34 deletions

View file

@ -200,37 +200,52 @@ func parseInt64(s string) int64 {
return n return n
} }
// parseName turns a name as found in the page into a remote path or returns false // Errors returned by parseName
func parseName(base *url.URL, name string) (string, bool) { var (
errURLJoinFailed = errors.New("URLJoin failed")
errFoundQuestionMark = errors.New("found ? in URL")
errHostMismatch = errors.New("host mismatch")
errSchemeMismatch = errors.New("scheme mismatch")
errNotUnderRoot = errors.New("not under root")
errNameIsEmpty = errors.New("name is empty")
errNameContainsSlash = errors.New("name contains /")
)
// parseName turns a name as found in the page into a remote path or returns an error
func parseName(base *url.URL, name string) (string, error) {
// make URL absolute
u, err := rest.URLJoin(base, name) u, err := rest.URLJoin(base, name)
if err != nil { if err != nil {
return "", false return "", errURLJoinFailed
} }
// check it doesn't have URL parameters
uStr := u.String() uStr := u.String()
if strings.Index(uStr, "?") >= 0 { if strings.Index(uStr, "?") >= 0 {
return "", false return "", errFoundQuestionMark
} }
baseStr := base.String() // check that this is going back to the same host and scheme
// check has URL prefix if base.Host != u.Host {
if !strings.HasPrefix(uStr, baseStr) { return "", errHostMismatch
return "", false }
if base.Scheme != u.Scheme {
return "", errSchemeMismatch
} }
// check has path prefix // check has path prefix
if !strings.HasPrefix(u.Path, base.Path) { if !strings.HasPrefix(u.Path, base.Path) {
return "", false return "", errNotUnderRoot
} }
// calculate the name relative to the base // calculate the name relative to the base
name = u.Path[len(base.Path):] name = u.Path[len(base.Path):]
// musn't be empty // musn't be empty
if name == "" { if name == "" {
return "", false return "", errNameIsEmpty
} }
// mustn't contain a / // mustn't contain a / - we are looking for a single level directory
slash := strings.Index(name, "/") slash := strings.Index(name, "/")
if slash >= 0 && slash != len(name)-1 { if slash >= 0 && slash != len(name)-1 {
return "", false return "", errNameContainsSlash
} }
return name, true return name, nil
} }
// Parse turns HTML for a directory into names // Parse turns HTML for a directory into names
@ -245,8 +260,8 @@ func parse(base *url.URL, in io.Reader) (names []string, err error) {
if n.Type == html.ElementNode && n.Data == "a" { if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "href" { if a.Key == "href" {
name, ok := parseName(base, a.Val) name, err := parseName(base, a.Val)
if ok { if err == nil {
names = append(names, name) names = append(names, name)
} }
break break

View file

@ -209,28 +209,32 @@ func TestParseName(t *testing.T) {
for i, test := range []struct { for i, test := range []struct {
base string base string
val string val string
wantOK bool wantErr error
want string want string
}{ }{
{"http://example.com/", "potato", true, "potato"}, {"http://example.com/", "potato", nil, "potato"},
{"http://example.com/dir/", "potato", true, "potato"}, {"http://example.com/dir/", "potato", nil, "potato"},
{"http://example.com/dir/", "../dir/potato", true, "potato"}, {"http://example.com/dir/", "potato?download=true", errFoundQuestionMark, ""},
{"http://example.com/dir/", "..", false, ""}, {"http://example.com/dir/", "../dir/potato", nil, "potato"},
{"http://example.com/dir/", "http://example.com/", false, ""}, {"http://example.com/dir/", "..", errNotUnderRoot, ""},
{"http://example.com/dir/", "http://example.com/dir/", false, ""}, {"http://example.com/dir/", "http://example.com/", errNotUnderRoot, ""},
{"http://example.com/dir/", "http://example.com/dir/potato", true, "potato"}, {"http://example.com/dir/", "http://example.com/dir/", errNameIsEmpty, ""},
{"http://example.com/dir/", "/dir/", false, ""}, {"http://example.com/dir/", "http://example.com/dir/potato", nil, "potato"},
{"http://example.com/dir/", "/dir/potato", true, "potato"}, {"http://example.com/dir/", "https://example.com/dir/potato", errSchemeMismatch, ""},
{"http://example.com/dir/", "subdir/potato", false, ""}, {"http://example.com/dir/", "http://notexample.com/dir/potato", errHostMismatch, ""},
{"http://example.com/dir/", "With percent %25.txt", true, "With percent %.txt"}, {"http://example.com/dir/", "/dir/", errNameIsEmpty, ""},
{"http://example.com/dir/", "With colon :", false, ""}, {"http://example.com/dir/", "/dir/potato", nil, "potato"},
{"http://example.com/dir/", rest.URLPathEscape("With colon :"), true, "With colon :"}, {"http://example.com/dir/", "subdir/potato", errNameContainsSlash, ""},
{"http://example.com/dir/", "With percent %25.txt", nil, "With percent %.txt"},
{"http://example.com/dir/", "With colon :", errURLJoinFailed, ""},
{"http://example.com/dir/", rest.URLPathEscape("With colon :"), nil, "With colon :"},
{"http://example.com/Dungeons%20%26%20Dragons/", "/Dungeons%20&%20Dragons/D%26D%20Basic%20%28Holmes%2C%20B%2C%20X%2C%20BECMI%29/", nil, "D&D Basic (Holmes, B, X, BECMI)/"},
} { } {
u, err := url.Parse(test.base) u, err := url.Parse(test.base)
require.NoError(t, err) require.NoError(t, err)
got, gotOK := parseName(u, test.val) got, gotErr := parseName(u, test.val)
what := fmt.Sprintf("test %d base=%q, val=%q", i, test.base, test.val) what := fmt.Sprintf("test %d base=%q, val=%q", i, test.base, test.val)
assert.Equal(t, test.wantOK, gotOK, what) assert.Equal(t, test.wantErr, gotErr, what)
assert.Equal(t, test.want, got, what) assert.Equal(t, test.want, got, what)
} }
} }