http: improved recognition of url pointing to a single file - fixes #5929
This commit is contained in:
parent
1045344943
commit
a667e03fc9
3 changed files with 218 additions and 37 deletions
|
@ -73,8 +73,9 @@ directories.`,
|
||||||
Advanced: true,
|
Advanced: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "no_head",
|
Name: "no_head",
|
||||||
Help: `Don't use HEAD requests to find file sizes in dir listing.
|
Help: `Don't use HEAD requests.
|
||||||
|
|
||||||
|
HEAD requests are mainly used to find file sizes in dir listing.
|
||||||
If your site is being very slow to load then you can try this option.
|
If your site is being very slow to load then you can try this option.
|
||||||
Normally rclone does a HEAD request for each potential file in a
|
Normally rclone does a HEAD request for each potential file in a
|
||||||
directory listing to:
|
directory listing to:
|
||||||
|
@ -134,6 +135,82 @@ func statusError(res *http.Response, err error) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getFsEndpoint decides if url is to be considered a file or directory,
|
||||||
|
// and returns a proper endpoint url to use for the fs.
|
||||||
|
func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) {
|
||||||
|
// If url ends with '/' it is already a proper url always assumed to be a directory.
|
||||||
|
if url[len(url)-1] == '/' {
|
||||||
|
return url, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If url does not end with '/' we send a HEAD request to decide
|
||||||
|
// if it is directory or file, and if directory appends the missing
|
||||||
|
// '/', or if file returns the directory url to parent instead.
|
||||||
|
createFileResult := func() (string, bool) {
|
||||||
|
fs.Debugf(nil, "If path is a directory you must add a trailing '/'")
|
||||||
|
parent, _ := path.Split(url)
|
||||||
|
return parent, true
|
||||||
|
}
|
||||||
|
createDirResult := func() (string, bool) {
|
||||||
|
fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path")
|
||||||
|
return url + "/", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If HEAD requests are not allowed we just have to assume it is a file.
|
||||||
|
if opt.NoHead {
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as --http-no-head is set")
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use a client which doesn't follow redirects so the server
|
||||||
|
// doesn't redirect http://host/dir to http://host/dir/
|
||||||
|
noRedir := *client
|
||||||
|
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||||
|
return http.ErrUseLastResponse
|
||||||
|
}
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err)
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
addHeaders(req, opt)
|
||||||
|
res, err := noRedir.Do(req)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err)
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
if res.StatusCode == http.StatusNotFound {
|
||||||
|
fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status)
|
||||||
|
return createDirResult()
|
||||||
|
}
|
||||||
|
if res.StatusCode == http.StatusMovedPermanently ||
|
||||||
|
res.StatusCode == http.StatusFound ||
|
||||||
|
res.StatusCode == http.StatusSeeOther ||
|
||||||
|
res.StatusCode == http.StatusTemporaryRedirect ||
|
||||||
|
res.StatusCode == http.StatusPermanentRedirect {
|
||||||
|
redir := res.Header.Get("Location")
|
||||||
|
if redir != "" {
|
||||||
|
if redir[len(redir)-1] == '/' {
|
||||||
|
fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir)
|
||||||
|
return createDirResult()
|
||||||
|
}
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir)
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status)
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
if res.StatusCode < 200 || res.StatusCode > 299 {
|
||||||
|
// Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests.
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status)
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status)
|
||||||
|
return createFileResult()
|
||||||
|
}
|
||||||
|
|
||||||
// NewFs creates a new Fs object from the name and root. It connects to
|
// NewFs creates a new Fs object from the name and root. It connects to
|
||||||
// the host specified in the config file.
|
// the host specified in the config file.
|
||||||
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||||
|
@ -164,37 +241,9 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
||||||
|
|
||||||
client := fshttp.NewClient(ctx)
|
client := fshttp.NewClient(ctx)
|
||||||
|
|
||||||
var isFile = false
|
endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt)
|
||||||
if !strings.HasSuffix(u.String(), "/") {
|
fs.Debugf(nil, "Root: %s", endpoint)
|
||||||
// Make a client which doesn't follow redirects so the server
|
u, err = url.Parse(endpoint)
|
||||||
// doesn't redirect http://host/dir to http://host/dir/
|
|
||||||
noRedir := *client
|
|
||||||
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
|
||||||
return http.ErrUseLastResponse
|
|
||||||
}
|
|
||||||
// check to see if points to a file
|
|
||||||
req, err := http.NewRequestWithContext(ctx, "HEAD", u.String(), nil)
|
|
||||||
if err == nil {
|
|
||||||
addHeaders(req, opt)
|
|
||||||
res, err := noRedir.Do(req)
|
|
||||||
err = statusError(res, err)
|
|
||||||
if err == nil {
|
|
||||||
isFile = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
newRoot := u.String()
|
|
||||||
if isFile {
|
|
||||||
// Point to the parent if this is a file
|
|
||||||
newRoot, _ = path.Split(u.String())
|
|
||||||
} else {
|
|
||||||
if !strings.HasSuffix(newRoot, "/") {
|
|
||||||
newRoot += "/"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u, err = url.Parse(newRoot)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -212,12 +261,16 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
||||||
f.features = (&fs.Features{
|
f.features = (&fs.Features{
|
||||||
CanHaveEmptyDirectories: true,
|
CanHaveEmptyDirectories: true,
|
||||||
}).Fill(ctx, f)
|
}).Fill(ctx, f)
|
||||||
|
|
||||||
if isFile {
|
if isFile {
|
||||||
|
// return an error with an fs which points to the parent
|
||||||
return f, fs.ErrorIsFile
|
return f, fs.ErrorIsFile
|
||||||
}
|
}
|
||||||
|
|
||||||
if !strings.HasSuffix(f.endpointURL, "/") {
|
if !strings.HasSuffix(f.endpointURL, "/") {
|
||||||
return nil, errors.New("internal error: url doesn't end with /")
|
return nil, errors.New("internal error: url doesn't end with /")
|
||||||
}
|
}
|
||||||
|
|
||||||
return f, nil
|
return f, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,10 @@ import (
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
@ -374,3 +376,106 @@ func TestParseCaddy(t *testing.T) {
|
||||||
"v1.36-22-g06ea13a-ssh-agentβ/",
|
"v1.36-22-g06ea13a-ssh-agentβ/",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFsNoSlashRoots(t *testing.T) {
|
||||||
|
// Test Fs with roots that does not end with '/', the logic that
|
||||||
|
// decides if url is to be considered a file or directory, based
|
||||||
|
// on result from a HEAD request.
|
||||||
|
|
||||||
|
// Handler for faking HEAD responses with different status codes
|
||||||
|
headCount := 0
|
||||||
|
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == "HEAD" {
|
||||||
|
headCount++
|
||||||
|
responseCode, err := strconv.Atoi(path.Base(r.URL.String()))
|
||||||
|
require.NoError(t, err)
|
||||||
|
if strings.HasPrefix(r.URL.String(), "/redirect/") {
|
||||||
|
var redir string
|
||||||
|
if strings.HasPrefix(r.URL.String(), "/redirect/file/") {
|
||||||
|
redir = "/redirected"
|
||||||
|
} else if strings.HasPrefix(r.URL.String(), "/redirect/dir/") {
|
||||||
|
redir = "/redirected/"
|
||||||
|
} else {
|
||||||
|
require.Fail(t, "Redirect test requests must start with '/redirect/file/' or '/redirect/dir/'")
|
||||||
|
}
|
||||||
|
http.Redirect(w, r, redir, responseCode)
|
||||||
|
} else {
|
||||||
|
http.Error(w, http.StatusText(responseCode), responseCode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Make the test server
|
||||||
|
ts := httptest.NewServer(handler)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
// Configure the remote
|
||||||
|
configfile.Install()
|
||||||
|
m := configmap.Simple{
|
||||||
|
"type": "http",
|
||||||
|
"url": ts.URL,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test
|
||||||
|
for i, test := range []struct {
|
||||||
|
root string
|
||||||
|
isFile bool
|
||||||
|
}{
|
||||||
|
// 2xx success
|
||||||
|
{"parent/200", true},
|
||||||
|
{"parent/204", true},
|
||||||
|
|
||||||
|
// 3xx redirection Redirect status 301, 302, 303, 307, 308
|
||||||
|
{"redirect/file/301", true}, // Request is redirected to "/redirected"
|
||||||
|
{"redirect/dir/301", false}, // Request is redirected to "/redirected/"
|
||||||
|
{"redirect/file/302", true}, // Request is redirected to "/redirected"
|
||||||
|
{"redirect/dir/302", false}, // Request is redirected to "/redirected/"
|
||||||
|
{"redirect/file/303", true}, // Request is redirected to "/redirected"
|
||||||
|
{"redirect/dir/303", false}, // Request is redirected to "/redirected/"
|
||||||
|
|
||||||
|
{"redirect/file/304", true}, // Not really a redirect, handled like 4xx errors (below)
|
||||||
|
{"redirect/file/305", true}, // Not really a redirect, handled like 4xx errors (below)
|
||||||
|
{"redirect/file/306", true}, // Not really a redirect, handled like 4xx errors (below)
|
||||||
|
|
||||||
|
{"redirect/file/307", true}, // Request is redirected to "/redirected"
|
||||||
|
{"redirect/dir/307", false}, // Request is redirected to "/redirected/"
|
||||||
|
{"redirect/file/308", true}, // Request is redirected to "/redirected"
|
||||||
|
{"redirect/dir/308", false}, // Request is redirected to "/redirected/"
|
||||||
|
|
||||||
|
// 4xx client errors
|
||||||
|
{"parent/403", true}, // Forbidden status (head request blocked)
|
||||||
|
{"parent/404", false}, // Not found status
|
||||||
|
} {
|
||||||
|
for _, noHead := range []bool{false, true} {
|
||||||
|
var isFile bool
|
||||||
|
if noHead {
|
||||||
|
m.Set("no_head", "true")
|
||||||
|
isFile = true
|
||||||
|
} else {
|
||||||
|
m.Set("no_head", "false")
|
||||||
|
isFile = test.isFile
|
||||||
|
}
|
||||||
|
headCount = 0
|
||||||
|
f, err := NewFs(context.Background(), remoteName, test.root, m)
|
||||||
|
if noHead {
|
||||||
|
assert.Equal(t, 0, headCount)
|
||||||
|
} else {
|
||||||
|
assert.Equal(t, 1, headCount)
|
||||||
|
}
|
||||||
|
if isFile {
|
||||||
|
assert.ErrorIs(t, err, fs.ErrorIsFile)
|
||||||
|
} else {
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
var endpoint string
|
||||||
|
if isFile {
|
||||||
|
parent, _ := path.Split(test.root)
|
||||||
|
endpoint = "/" + parent
|
||||||
|
} else {
|
||||||
|
endpoint = "/" + test.root + "/"
|
||||||
|
}
|
||||||
|
what := fmt.Sprintf("i=%d, root=%q, isFile=%v, noHead=%v", i, test.root, isFile, noHead)
|
||||||
|
assert.Equal(t, ts.URL+endpoint, f.String(), what)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -12,7 +12,26 @@ webservers such as Apache/Nginx/Caddy and will likely work with file
|
||||||
listings from most web servers. (If it doesn't then please file an
|
listings from most web servers. (If it doesn't then please file an
|
||||||
issue, or send a pull request!)
|
issue, or send a pull request!)
|
||||||
|
|
||||||
Paths are specified as `remote:` or `remote:path/to/dir`.
|
Paths are specified as `remote:` or `remote:path`.
|
||||||
|
|
||||||
|
The `remote:` represents the configured [url](#http-url), and any path following
|
||||||
|
it will be resolved relative to this url, according to the URL standard. This
|
||||||
|
means with remote url `https://beta.rclone.org/branch` and path `fix`, the
|
||||||
|
resolved URL will be `https://beta.rclone.org/branch/fix`, while with path
|
||||||
|
`/fix` the resolved URL will be `https://beta.rclone.org/fix` as the absolute
|
||||||
|
path is resolved from the root of the domain.
|
||||||
|
|
||||||
|
If the path following the `remote:` ends with `/` it will be assumed to point
|
||||||
|
to a directory. If the path does not end with `/`, then a HEAD request is sent
|
||||||
|
and the response used to decide if it it is treated as a file or a directory
|
||||||
|
(run with `-vv` to see details). When [--http-no-head](#http-no-head) is
|
||||||
|
specified, a path without ending `/` is always assumed to be a file. If rclone
|
||||||
|
incorrectly assumes the path is a file, the solution is to specify the path with
|
||||||
|
ending `/`. When you know the path is a directory, ending it with `/` is always
|
||||||
|
better as it avoids the initial HEAD request.
|
||||||
|
|
||||||
|
To just download a single file it is easier to use
|
||||||
|
[copyurl](/commands/rclone_copyurl/).
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
|
@ -81,25 +100,29 @@ Sync the remote `directory` to `/home/local/directory`, deleting any excess file
|
||||||
|
|
||||||
rclone sync -i remote:directory /home/local/directory
|
rclone sync -i remote:directory /home/local/directory
|
||||||
|
|
||||||
### Read only ###
|
### Read only
|
||||||
|
|
||||||
This remote is read only - you can't upload files to an HTTP server.
|
This remote is read only - you can't upload files to an HTTP server.
|
||||||
|
|
||||||
### Modified time ###
|
### Modified time
|
||||||
|
|
||||||
Most HTTP servers store time accurate to 1 second.
|
Most HTTP servers store time accurate to 1 second.
|
||||||
|
|
||||||
### Checksum ###
|
### Checksum
|
||||||
|
|
||||||
No checksums are stored.
|
No checksums are stored.
|
||||||
|
|
||||||
### Usage without a config file ###
|
### Usage without a config file
|
||||||
|
|
||||||
Since the http remote only has one config parameter it is easy to use
|
Since the http remote only has one config parameter it is easy to use
|
||||||
without a config file:
|
without a config file:
|
||||||
|
|
||||||
rclone lsd --http-url https://beta.rclone.org :http:
|
rclone lsd --http-url https://beta.rclone.org :http:
|
||||||
|
|
||||||
|
or:
|
||||||
|
|
||||||
|
rclone lsd :http,url='https://beta.rclone.org':
|
||||||
|
|
||||||
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/http/http.go then run make backenddocs" >}}
|
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/http/http.go then run make backenddocs" >}}
|
||||||
### Standard options
|
### Standard options
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue