// Package http provides a filesystem interface using golang.org/net/http // // It treads HTML pages served from the endpoint as directory // listings, and includes any links found as files. // +build !plan9 package http import ( "fmt" "io" "net/http" "net/url" "os" "path" "strconv" "strings" "sync" "time" "golang.org/x/net/html" "github.com/ncw/rclone/fs" "github.com/pkg/errors" ) func init() { fsi := &fs.RegInfo{ Name: "http", Description: "http Connection", NewFs: NewFs, Options: []fs.Option{{ Name: "endpoint", Help: "http host to connect to", Optional: false, Examples: []fs.OptionExample{{ Value: "example.com", Help: "Connect to example.com", }}, }}, } fs.Register(fsi) } // Fs stores the interface to the remote HTTP files type Fs struct { name string root string features *fs.Features // optional features endpoint *url.URL httpClient *http.Client } // Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading) type Object struct { fs *Fs remote string info os.FileInfo } // ObjectReader holds the File interface to a remote http file opened for reading type ObjectReader struct { object *Object httpFile io.ReadCloser } func urlJoin(u *url.URL, paths ...string) string { r := u for _, p := range paths { if p == "/" { continue } rel, _ := url.Parse(p) r = r.ResolveReference(rel) } return r.String() } // NewFs creates a new Fs object from the name and root. It connects to // the host specified in the config file. func NewFs(name, root string) (fs.Fs, error) { endpoint := fs.ConfigFileGet(name, "endpoint") u, err := url.Parse(endpoint) if err != nil { return nil, err } if !strings.HasSuffix(root, "/") && root != "" { root += "/" } client := fs.Config.Client() _, err = client.Head(urlJoin(u, root)) if err != nil { return nil, errors.Wrap(err, "couldn't connect http") } f := &Fs{ name: name, root: root, httpClient: client, endpoint: u, } f.features = (&fs.Features{}).Fill(f) return f, nil } // Name returns the configured name of the file system func (f *Fs) Name() string { return f.name } // Root returns the root for the filesystem func (f *Fs) Root() string { return f.root } // String returns the URL for the filesystem func (f *Fs) String() string { return urlJoin(f.endpoint, f.root) } // Features returns the optional features of this Fs func (f *Fs) Features() *fs.Features { return f.features } // Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s func (f *Fs) Precision() time.Duration { return time.Second } // NewObject creates a new remote http file object func (f *Fs) NewObject(remote string) (fs.Object, error) { o := &Object{ fs: f, remote: remote, } err := o.stat() if err != nil { return nil, errors.Wrap(err, "Stat failed") } return o, nil } // dirExists returns true,nil if the directory exists, false, nil if // it doesn't or false, err func (f *Fs) dirExists(dir string) (bool, error) { res, err := f.httpClient.Head(urlJoin(f.endpoint, dir)) if err != nil { return false, err } if res.StatusCode == http.StatusOK { return true, nil } return false, nil } type entry struct { name string url string size int64 mode os.FileMode mtime int64 } func (e *entry) Name() string { return e.name } func (e *entry) Size() int64 { return e.size } func (e *entry) Mode() os.FileMode { return os.FileMode(e.mode) } func (e *entry) ModTime() time.Time { return time.Unix(e.mtime, 0) } func (e *entry) IsDir() bool { return e.mode&os.ModeDir != 0 } func (e *entry) Sys() interface{} { return nil } func parseInt64(s string) int64 { n, e := strconv.ParseInt(s, 10, 64) if e != nil { return 0 } return n } func parseBool(s string) bool { b, e := strconv.ParseBool(s) if e != nil { return false } return b } func prepareTimeString(ts string) string { return strings.Trim(strings.Join(strings.SplitN(strings.Trim(ts, "\t "), " ", 3)[0:2], " "), "\r\n\t ") } func parseTime(n *html.Node) (t time.Time) { if ts := prepareTimeString(n.Data); ts != "" { t, _ = time.Parse("2-Jan-2006 15:04", ts) } return t } func (f *Fs) readDir(path string) ([]*entry, error) { entries := make([]*entry, 0) res, err := f.httpClient.Get(urlJoin(f.endpoint, path)) if err != nil { return nil, err } if res.Body == nil || res.StatusCode != http.StatusOK { //return nil, errors.Errorf("directory listing failed with error: % (%d)", res.Status, res.StatusCode) return nil, nil } defer fs.CheckClose(res.Body, &err) switch strings.SplitN(res.Header.Get("Content-Type"), ";", 2)[0] { case "text/html": doc, err := html.Parse(res.Body) if err != nil { return nil, err } var walk func(*html.Node) walk = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "a" { for _, a := range n.Attr { if a.Key == "href" { name, err := url.QueryUnescape(a.Val) if err != nil { continue } if name == "../" || name == "./" { break } e := &entry{ name: strings.TrimRight(name, "/"), url: name, } if a.Val[len(a.Val)-1] == '/' { e.mode = os.FileMode(0555) | os.ModeDir } else { e.mode = os.FileMode(0444) } entries = append(entries, e) break } } } for c := n.FirstChild; c != nil; c = c.NextSibling { walk(c) } } walk(doc) } return entries, nil } func (f *Fs) list(out fs.ListOpts, dir string, level int, wg *sync.WaitGroup, tokens chan struct{}) { defer wg.Done() // take a token <-tokens // return it when done defer func() { tokens <- struct{}{} }() httpDir := path.Join(f.root, dir) if !strings.HasSuffix(dir, "/") { httpDir += "/" } infos, err := f.readDir(httpDir) if err != nil { err = errors.Wrapf(err, "error listing %q", dir) fs.Errorf(f, "Listing failed: %v", err) out.SetError(err) return } for _, info := range infos { remote := "" if dir != "" { remote = dir + "/" + info.Name() } else { remote = info.Name() } if info.IsDir() { if out.IncludeDirectory(remote) { dir := &fs.Dir{ Name: remote, When: info.ModTime(), Bytes: 0, Count: 0, } out.AddDir(dir) if level < out.Level() { wg.Add(1) go f.list(out, remote, level+1, wg, tokens) } } } else { file := &Object{ fs: f, remote: remote, info: info, } if err = file.stat(); err != nil { continue } out.Add(file) } } } // List the files and directories starting at