http: add --http-no-slash for websites with directories with no slashes #3053

See: https://forum.rclone.org/t/is-there-a-way-to-log-into-an-htpp-server/8484
This commit is contained in:
Nick Craig-Wood 2019-02-08 13:58:47 +00:00
parent 54a2e99d97
commit e62bbf761b
3 changed files with 57 additions and 10 deletions

View file

@ -6,6 +6,7 @@ package http
import (
"io"
"mime"
"net/http"
"net/url"
"path"
@ -44,6 +45,22 @@ func init() {
Value: "https://user:pass@example.com",
Help: "Connect to example.com using a username and password",
}},
}, {
Name: "no_slash",
Help: `Set this if the site doesn't end directories with /
Use this if your target website does not use / on the end of
directories.
A / on the end of a path is how rclone normally tells the difference
between files and directories. If this flag is set, then rclone will
treat all files with Content-Type: text/html as directories and read
URLs from them rather than downloading them.
Note that this may cause rclone to confuse genuine HTML files with
directories.`,
Default: false,
Advanced: true,
}},
}
fs.Register(fsi)
@ -52,6 +69,7 @@ func init() {
// Options defines the configuration for this backend
type Options struct {
Endpoint string `config:"url"`
NoSlash bool `config:"no_slash"`
}
// Fs stores the interface to the remote HTTP files
@ -359,11 +377,16 @@ func (f *Fs) List(dir string) (entries fs.DirEntries, err error) {
fs: f,
remote: remote,
}
if err = file.stat(); err != nil {
switch err = file.stat(); err {
case nil:
entries = append(entries, file)
case fs.ErrorNotAFile:
// ...found a directory not a file
dir := fs.NewDir(remote, timeUnset)
entries = append(entries, dir)
default:
fs.Debugf(remote, "skipping because of error: %v", err)
continue
}
entries = append(entries, file)
}
}
return entries, nil
@ -439,6 +462,16 @@ func (o *Object) stat() error {
o.size = parseInt64(res.Header.Get("Content-Length"), -1)
o.modTime = t
o.contentType = res.Header.Get("Content-Type")
// If NoSlash is set then check ContentType to see if it is a directory
if o.fs.opt.NoSlash {
mediaType, _, err := mime.ParseMediaType(o.contentType)
if err != nil {
return errors.Wrapf(err, "failed to parse Content-Type: %q", o.contentType)
}
if mediaType == "text/html" {
return fs.ErrorNotAFile
}
}
return nil
}

View file

@ -65,7 +65,7 @@ func prepare(t *testing.T) (fs.Fs, func()) {
return f, tidy
}
func testListRoot(t *testing.T, f fs.Fs) {
func testListRoot(t *testing.T, f fs.Fs, noSlash bool) {
entries, err := f.List("")
require.NoError(t, err)
@ -93,15 +93,29 @@ func testListRoot(t *testing.T, f fs.Fs) {
e = entries[3]
assert.Equal(t, "two.html", e.Remote())
assert.Equal(t, int64(7), e.Size())
_, ok = e.(*Object)
assert.True(t, ok)
if noSlash {
assert.Equal(t, int64(-1), e.Size())
_, ok = e.(fs.Directory)
assert.True(t, ok)
} else {
assert.Equal(t, int64(41), e.Size())
_, ok = e.(*Object)
assert.True(t, ok)
}
}
func TestListRoot(t *testing.T) {
f, tidy := prepare(t)
defer tidy()
testListRoot(t, f)
testListRoot(t, f, false)
}
func TestListRootNoSlash(t *testing.T) {
f, tidy := prepare(t)
f.(*Fs).opt.NoSlash = true
defer tidy()
testListRoot(t, f, true)
}
func TestListSubDir(t *testing.T) {
@ -194,7 +208,7 @@ func TestIsAFileRoot(t *testing.T) {
f, err := NewFs(remoteName, "one%.txt", m)
assert.Equal(t, err, fs.ErrorIsFile)
testListRoot(t, f)
testListRoot(t, f, false)
}
func TestIsAFileSubDir(t *testing.T) {

View file

@ -1 +1 @@
potato
<a href="two.html/file.txt">file.txt</a>