http: improved recognition of url pointing to a single file - fixes #5929
This commit is contained in:
parent
1045344943
commit
a667e03fc9
3 changed files with 218 additions and 37 deletions
|
@ -73,8 +73,9 @@ directories.`,
|
|||
Advanced: true,
|
||||
}, {
|
||||
Name: "no_head",
|
||||
Help: `Don't use HEAD requests to find file sizes in dir listing.
|
||||
Help: `Don't use HEAD requests.
|
||||
|
||||
HEAD requests are mainly used to find file sizes in dir listing.
|
||||
If your site is being very slow to load then you can try this option.
|
||||
Normally rclone does a HEAD request for each potential file in a
|
||||
directory listing to:
|
||||
|
@ -134,6 +135,82 @@ func statusError(res *http.Response, err error) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// getFsEndpoint decides if url is to be considered a file or directory,
|
||||
// and returns a proper endpoint url to use for the fs.
|
||||
func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) {
|
||||
// If url ends with '/' it is already a proper url always assumed to be a directory.
|
||||
if url[len(url)-1] == '/' {
|
||||
return url, false
|
||||
}
|
||||
|
||||
// If url does not end with '/' we send a HEAD request to decide
|
||||
// if it is directory or file, and if directory appends the missing
|
||||
// '/', or if file returns the directory url to parent instead.
|
||||
createFileResult := func() (string, bool) {
|
||||
fs.Debugf(nil, "If path is a directory you must add a trailing '/'")
|
||||
parent, _ := path.Split(url)
|
||||
return parent, true
|
||||
}
|
||||
createDirResult := func() (string, bool) {
|
||||
fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path")
|
||||
return url + "/", false
|
||||
}
|
||||
|
||||
// If HEAD requests are not allowed we just have to assume it is a file.
|
||||
if opt.NoHead {
|
||||
fs.Debugf(nil, "Assuming path is a file as --http-no-head is set")
|
||||
return createFileResult()
|
||||
}
|
||||
|
||||
// Use a client which doesn't follow redirects so the server
|
||||
// doesn't redirect http://host/dir to http://host/dir/
|
||||
noRedir := *client
|
||||
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
||||
if err != nil {
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err)
|
||||
return createFileResult()
|
||||
}
|
||||
addHeaders(req, opt)
|
||||
res, err := noRedir.Do(req)
|
||||
|
||||
if err != nil {
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err)
|
||||
return createFileResult()
|
||||
}
|
||||
if res.StatusCode == http.StatusNotFound {
|
||||
fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status)
|
||||
return createDirResult()
|
||||
}
|
||||
if res.StatusCode == http.StatusMovedPermanently ||
|
||||
res.StatusCode == http.StatusFound ||
|
||||
res.StatusCode == http.StatusSeeOther ||
|
||||
res.StatusCode == http.StatusTemporaryRedirect ||
|
||||
res.StatusCode == http.StatusPermanentRedirect {
|
||||
redir := res.Header.Get("Location")
|
||||
if redir != "" {
|
||||
if redir[len(redir)-1] == '/' {
|
||||
fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir)
|
||||
return createDirResult()
|
||||
}
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir)
|
||||
return createFileResult()
|
||||
}
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status)
|
||||
return createFileResult()
|
||||
}
|
||||
if res.StatusCode < 200 || res.StatusCode > 299 {
|
||||
// Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests.
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status)
|
||||
return createFileResult()
|
||||
}
|
||||
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status)
|
||||
return createFileResult()
|
||||
}
|
||||
|
||||
// NewFs creates a new Fs object from the name and root. It connects to
|
||||
// the host specified in the config file.
|
||||
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
|
@ -164,37 +241,9 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
|||
|
||||
client := fshttp.NewClient(ctx)
|
||||
|
||||
var isFile = false
|
||||
if !strings.HasSuffix(u.String(), "/") {
|
||||
// Make a client which doesn't follow redirects so the server
|
||||
// doesn't redirect http://host/dir to http://host/dir/
|
||||
noRedir := *client
|
||||
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
// check to see if points to a file
|
||||
req, err := http.NewRequestWithContext(ctx, "HEAD", u.String(), nil)
|
||||
if err == nil {
|
||||
addHeaders(req, opt)
|
||||
res, err := noRedir.Do(req)
|
||||
err = statusError(res, err)
|
||||
if err == nil {
|
||||
isFile = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
newRoot := u.String()
|
||||
if isFile {
|
||||
// Point to the parent if this is a file
|
||||
newRoot, _ = path.Split(u.String())
|
||||
} else {
|
||||
if !strings.HasSuffix(newRoot, "/") {
|
||||
newRoot += "/"
|
||||
}
|
||||
}
|
||||
|
||||
u, err = url.Parse(newRoot)
|
||||
endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt)
|
||||
fs.Debugf(nil, "Root: %s", endpoint)
|
||||
u, err = url.Parse(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -212,12 +261,16 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
|||
f.features = (&fs.Features{
|
||||
CanHaveEmptyDirectories: true,
|
||||
}).Fill(ctx, f)
|
||||
|
||||
if isFile {
|
||||
// return an error with an fs which points to the parent
|
||||
return f, fs.ErrorIsFile
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(f.endpointURL, "/") {
|
||||
return nil, errors.New("internal error: url doesn't end with /")
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -8,8 +8,10 @@ import (
|
|||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -374,3 +376,106 @@ func TestParseCaddy(t *testing.T) {
|
|||
"v1.36-22-g06ea13a-ssh-agentβ/",
|
||||
})
|
||||
}
|
||||
|
||||
func TestFsNoSlashRoots(t *testing.T) {
|
||||
// Test Fs with roots that does not end with '/', the logic that
|
||||
// decides if url is to be considered a file or directory, based
|
||||
// on result from a HEAD request.
|
||||
|
||||
// Handler for faking HEAD responses with different status codes
|
||||
headCount := 0
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method == "HEAD" {
|
||||
headCount++
|
||||
responseCode, err := strconv.Atoi(path.Base(r.URL.String()))
|
||||
require.NoError(t, err)
|
||||
if strings.HasPrefix(r.URL.String(), "/redirect/") {
|
||||
var redir string
|
||||
if strings.HasPrefix(r.URL.String(), "/redirect/file/") {
|
||||
redir = "/redirected"
|
||||
} else if strings.HasPrefix(r.URL.String(), "/redirect/dir/") {
|
||||
redir = "/redirected/"
|
||||
} else {
|
||||
require.Fail(t, "Redirect test requests must start with '/redirect/file/' or '/redirect/dir/'")
|
||||
}
|
||||
http.Redirect(w, r, redir, responseCode)
|
||||
} else {
|
||||
http.Error(w, http.StatusText(responseCode), responseCode)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Make the test server
|
||||
ts := httptest.NewServer(handler)
|
||||
defer ts.Close()
|
||||
|
||||
// Configure the remote
|
||||
configfile.Install()
|
||||
m := configmap.Simple{
|
||||
"type": "http",
|
||||
"url": ts.URL,
|
||||
}
|
||||
|
||||
// Test
|
||||
for i, test := range []struct {
|
||||
root string
|
||||
isFile bool
|
||||
}{
|
||||
// 2xx success
|
||||
{"parent/200", true},
|
||||
{"parent/204", true},
|
||||
|
||||
// 3xx redirection Redirect status 301, 302, 303, 307, 308
|
||||
{"redirect/file/301", true}, // Request is redirected to "/redirected"
|
||||
{"redirect/dir/301", false}, // Request is redirected to "/redirected/"
|
||||
{"redirect/file/302", true}, // Request is redirected to "/redirected"
|
||||
{"redirect/dir/302", false}, // Request is redirected to "/redirected/"
|
||||
{"redirect/file/303", true}, // Request is redirected to "/redirected"
|
||||
{"redirect/dir/303", false}, // Request is redirected to "/redirected/"
|
||||
|
||||
{"redirect/file/304", true}, // Not really a redirect, handled like 4xx errors (below)
|
||||
{"redirect/file/305", true}, // Not really a redirect, handled like 4xx errors (below)
|
||||
{"redirect/file/306", true}, // Not really a redirect, handled like 4xx errors (below)
|
||||
|
||||
{"redirect/file/307", true}, // Request is redirected to "/redirected"
|
||||
{"redirect/dir/307", false}, // Request is redirected to "/redirected/"
|
||||
{"redirect/file/308", true}, // Request is redirected to "/redirected"
|
||||
{"redirect/dir/308", false}, // Request is redirected to "/redirected/"
|
||||
|
||||
// 4xx client errors
|
||||
{"parent/403", true}, // Forbidden status (head request blocked)
|
||||
{"parent/404", false}, // Not found status
|
||||
} {
|
||||
for _, noHead := range []bool{false, true} {
|
||||
var isFile bool
|
||||
if noHead {
|
||||
m.Set("no_head", "true")
|
||||
isFile = true
|
||||
} else {
|
||||
m.Set("no_head", "false")
|
||||
isFile = test.isFile
|
||||
}
|
||||
headCount = 0
|
||||
f, err := NewFs(context.Background(), remoteName, test.root, m)
|
||||
if noHead {
|
||||
assert.Equal(t, 0, headCount)
|
||||
} else {
|
||||
assert.Equal(t, 1, headCount)
|
||||
}
|
||||
if isFile {
|
||||
assert.ErrorIs(t, err, fs.ErrorIsFile)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
var endpoint string
|
||||
if isFile {
|
||||
parent, _ := path.Split(test.root)
|
||||
endpoint = "/" + parent
|
||||
} else {
|
||||
endpoint = "/" + test.root + "/"
|
||||
}
|
||||
what := fmt.Sprintf("i=%d, root=%q, isFile=%v, noHead=%v", i, test.root, isFile, noHead)
|
||||
assert.Equal(t, ts.URL+endpoint, f.String(), what)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,26 @@ webservers such as Apache/Nginx/Caddy and will likely work with file
|
|||
listings from most web servers. (If it doesn't then please file an
|
||||
issue, or send a pull request!)
|
||||
|
||||
Paths are specified as `remote:` or `remote:path/to/dir`.
|
||||
Paths are specified as `remote:` or `remote:path`.
|
||||
|
||||
The `remote:` represents the configured [url](#http-url), and any path following
|
||||
it will be resolved relative to this url, according to the URL standard. This
|
||||
means with remote url `https://beta.rclone.org/branch` and path `fix`, the
|
||||
resolved URL will be `https://beta.rclone.org/branch/fix`, while with path
|
||||
`/fix` the resolved URL will be `https://beta.rclone.org/fix` as the absolute
|
||||
path is resolved from the root of the domain.
|
||||
|
||||
If the path following the `remote:` ends with `/` it will be assumed to point
|
||||
to a directory. If the path does not end with `/`, then a HEAD request is sent
|
||||
and the response used to decide if it it is treated as a file or a directory
|
||||
(run with `-vv` to see details). When [--http-no-head](#http-no-head) is
|
||||
specified, a path without ending `/` is always assumed to be a file. If rclone
|
||||
incorrectly assumes the path is a file, the solution is to specify the path with
|
||||
ending `/`. When you know the path is a directory, ending it with `/` is always
|
||||
better as it avoids the initial HEAD request.
|
||||
|
||||
To just download a single file it is easier to use
|
||||
[copyurl](/commands/rclone_copyurl/).
|
||||
|
||||
## Configuration
|
||||
|
||||
|
@ -81,25 +100,29 @@ Sync the remote `directory` to `/home/local/directory`, deleting any excess file
|
|||
|
||||
rclone sync -i remote:directory /home/local/directory
|
||||
|
||||
### Read only ###
|
||||
### Read only
|
||||
|
||||
This remote is read only - you can't upload files to an HTTP server.
|
||||
|
||||
### Modified time ###
|
||||
### Modified time
|
||||
|
||||
Most HTTP servers store time accurate to 1 second.
|
||||
|
||||
### Checksum ###
|
||||
### Checksum
|
||||
|
||||
No checksums are stored.
|
||||
|
||||
### Usage without a config file ###
|
||||
### Usage without a config file
|
||||
|
||||
Since the http remote only has one config parameter it is easy to use
|
||||
without a config file:
|
||||
|
||||
rclone lsd --http-url https://beta.rclone.org :http:
|
||||
|
||||
or:
|
||||
|
||||
rclone lsd :http,url='https://beta.rclone.org':
|
||||
|
||||
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/http/http.go then run make backenddocs" >}}
|
||||
### Standard options
|
||||
|
||||
|
|
Loading…
Reference in a new issue