http: improved recognition of url pointing to a single file - fixes #5929

This commit is contained in:
albertony 2022-01-23 23:39:05 +01:00
parent 1045344943
commit a667e03fc9
3 changed files with 218 additions and 37 deletions

View file

@ -73,8 +73,9 @@ directories.`,
Advanced: true,
}, {
Name: "no_head",
Help: `Don't use HEAD requests to find file sizes in dir listing.
Help: `Don't use HEAD requests.
HEAD requests are mainly used to find file sizes in dir listing.
If your site is being very slow to load then you can try this option.
Normally rclone does a HEAD request for each potential file in a
directory listing to:
@ -134,6 +135,82 @@ func statusError(res *http.Response, err error) error {
return nil
}
// getFsEndpoint decides if url is to be considered a file or directory,
// and returns a proper endpoint url to use for the fs.
func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) {
// If url ends with '/' it is already a proper url always assumed to be a directory.
if url[len(url)-1] == '/' {
return url, false
}
// If url does not end with '/' we send a HEAD request to decide
// if it is directory or file, and if directory appends the missing
// '/', or if file returns the directory url to parent instead.
createFileResult := func() (string, bool) {
fs.Debugf(nil, "If path is a directory you must add a trailing '/'")
parent, _ := path.Split(url)
return parent, true
}
createDirResult := func() (string, bool) {
fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path")
return url + "/", false
}
// If HEAD requests are not allowed we just have to assume it is a file.
if opt.NoHead {
fs.Debugf(nil, "Assuming path is a file as --http-no-head is set")
return createFileResult()
}
// Use a client which doesn't follow redirects so the server
// doesn't redirect http://host/dir to http://host/dir/
noRedir := *client
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
if err != nil {
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err)
return createFileResult()
}
addHeaders(req, opt)
res, err := noRedir.Do(req)
if err != nil {
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err)
return createFileResult()
}
if res.StatusCode == http.StatusNotFound {
fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status)
return createDirResult()
}
if res.StatusCode == http.StatusMovedPermanently ||
res.StatusCode == http.StatusFound ||
res.StatusCode == http.StatusSeeOther ||
res.StatusCode == http.StatusTemporaryRedirect ||
res.StatusCode == http.StatusPermanentRedirect {
redir := res.Header.Get("Location")
if redir != "" {
if redir[len(redir)-1] == '/' {
fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir)
return createDirResult()
}
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir)
return createFileResult()
}
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status)
return createFileResult()
}
if res.StatusCode < 200 || res.StatusCode > 299 {
// Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests.
fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status)
return createFileResult()
}
fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status)
return createFileResult()
}
// NewFs creates a new Fs object from the name and root. It connects to
// the host specified in the config file.
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
@ -164,37 +241,9 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
client := fshttp.NewClient(ctx)
var isFile = false
if !strings.HasSuffix(u.String(), "/") {
// Make a client which doesn't follow redirects so the server
// doesn't redirect http://host/dir to http://host/dir/
noRedir := *client
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
// check to see if points to a file
req, err := http.NewRequestWithContext(ctx, "HEAD", u.String(), nil)
if err == nil {
addHeaders(req, opt)
res, err := noRedir.Do(req)
err = statusError(res, err)
if err == nil {
isFile = true
}
}
}
newRoot := u.String()
if isFile {
// Point to the parent if this is a file
newRoot, _ = path.Split(u.String())
} else {
if !strings.HasSuffix(newRoot, "/") {
newRoot += "/"
}
}
u, err = url.Parse(newRoot)
endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt)
fs.Debugf(nil, "Root: %s", endpoint)
u, err = url.Parse(endpoint)
if err != nil {
return nil, err
}
@ -212,12 +261,16 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
f.features = (&fs.Features{
CanHaveEmptyDirectories: true,
}).Fill(ctx, f)
if isFile {
// return an error with an fs which points to the parent
return f, fs.ErrorIsFile
}
if !strings.HasSuffix(f.endpointURL, "/") {
return nil, errors.New("internal error: url doesn't end with /")
}
return f, nil
}