HDFS (Hadoop Distributed File System) implementation - #42
This includes an HDFS docker image to use with the integration tests. Co-authored-by: Ivan Andreev <ivandeex@gmail.com> Co-authored-by: Nick Craig-Wood <nick@craig-wood.com>
This commit is contained in:
parent
768e4c4735
commit
71edc75ca6
26 changed files with 906 additions and 0 deletions
257
backend/hdfs/fs.go
Normal file
257
backend/hdfs/fs.go
Normal file
|
@ -0,0 +1,257 @@
|
|||
// +build !plan9
|
||||
|
||||
package hdfs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"github.com/colinmarc/hdfs/v2"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config/configmap"
|
||||
"github.com/rclone/rclone/fs/config/configstruct"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
)
|
||||
|
||||
// Fs represents a HDFS server
|
||||
type Fs struct {
|
||||
name string
|
||||
root string
|
||||
features *fs.Features // optional features
|
||||
opt Options // options for this backend
|
||||
ci *fs.ConfigInfo // global config
|
||||
client *hdfs.Client
|
||||
}
|
||||
|
||||
// NewFs constructs an Fs from the path
|
||||
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
opt := new(Options)
|
||||
err := configstruct.Set(m, opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client, err := hdfs.NewClient(hdfs.ClientOptions{
|
||||
Addresses: []string{opt.Namenode},
|
||||
User: opt.Username,
|
||||
UseDatanodeHostname: false,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f := &Fs{
|
||||
name: name,
|
||||
root: root,
|
||||
opt: *opt,
|
||||
ci: fs.GetConfig(ctx),
|
||||
client: client,
|
||||
}
|
||||
|
||||
f.features = (&fs.Features{
|
||||
CanHaveEmptyDirectories: true,
|
||||
}).Fill(ctx, f)
|
||||
|
||||
info, err := f.client.Stat(f.realpath(""))
|
||||
if err == nil && !info.IsDir() {
|
||||
f.root = path.Dir(f.root)
|
||||
return f, fs.ErrorIsFile
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Name of this fs
|
||||
func (f *Fs) Name() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
// Root of the remote (as passed into NewFs)
|
||||
func (f *Fs) Root() string {
|
||||
return f.root
|
||||
}
|
||||
|
||||
// String returns a description of the FS
|
||||
func (f *Fs) String() string {
|
||||
return fmt.Sprintf("hdfs://%s", f.opt.Namenode)
|
||||
}
|
||||
|
||||
// Features returns the optional features of this Fs
|
||||
func (f *Fs) Features() *fs.Features {
|
||||
return f.features
|
||||
}
|
||||
|
||||
// Precision return the precision of this Fs
|
||||
func (f *Fs) Precision() time.Duration {
|
||||
return time.Second
|
||||
}
|
||||
|
||||
// Hashes are not supported
|
||||
func (f *Fs) Hashes() hash.Set {
|
||||
return hash.Set(hash.None)
|
||||
}
|
||||
|
||||
// NewObject finds file at remote or return fs.ErrorObjectNotFound
|
||||
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||
realpath := f.realpath(remote)
|
||||
fs.Debugf(f, "new [%s]", realpath)
|
||||
|
||||
info, err := f.ensureFile(realpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Object{
|
||||
fs: f,
|
||||
remote: remote,
|
||||
size: info.Size(),
|
||||
modTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// List the objects and directories in dir into entries.
|
||||
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
realpath := f.realpath(dir)
|
||||
fs.Debugf(f, "list [%s]", realpath)
|
||||
|
||||
err = f.ensureDirectory(realpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
list, err := f.client.ReadDir(realpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, x := range list {
|
||||
stdName := f.opt.Enc.ToStandardName(x.Name())
|
||||
remote := path.Join(dir, stdName)
|
||||
if x.IsDir() {
|
||||
entries = append(entries, fs.NewDir(remote, x.ModTime()))
|
||||
} else {
|
||||
entries = append(entries, &Object{
|
||||
fs: f,
|
||||
remote: remote,
|
||||
size: x.Size(),
|
||||
modTime: x.ModTime()})
|
||||
}
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// Put the object
|
||||
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
o := &Object{
|
||||
fs: f,
|
||||
remote: src.Remote(),
|
||||
}
|
||||
err := o.Update(ctx, in, src, options...)
|
||||
return o, err
|
||||
}
|
||||
|
||||
// PutStream uploads to the remote path with the modTime given of indeterminate size
|
||||
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
return f.Put(ctx, in, src, options...)
|
||||
}
|
||||
|
||||
// Mkdir makes a directory
|
||||
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
|
||||
fs.Debugf(f, "mkdir [%s]", f.realpath(dir))
|
||||
return f.client.MkdirAll(f.realpath(dir), 0755)
|
||||
}
|
||||
|
||||
// Rmdir deletes the directory
|
||||
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
||||
realpath := f.realpath(dir)
|
||||
fs.Debugf(f, "rmdir [%s]", realpath)
|
||||
|
||||
err := f.ensureDirectory(realpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// do not remove empty directory
|
||||
list, err := f.client.ReadDir(realpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(list) > 0 {
|
||||
return fs.ErrorDirectoryNotEmpty
|
||||
}
|
||||
|
||||
return f.client.Remove(realpath)
|
||||
}
|
||||
|
||||
// Purge deletes all the files in the directory
|
||||
func (f *Fs) Purge(ctx context.Context, dir string) error {
|
||||
realpath := f.realpath(dir)
|
||||
fs.Debugf(f, "purge [%s]", realpath)
|
||||
|
||||
err := f.ensureDirectory(realpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return f.client.RemoveAll(realpath)
|
||||
}
|
||||
|
||||
// About gets quota information from the Fs
|
||||
func (f *Fs) About(ctx context.Context) (*fs.Usage, error) {
|
||||
info, err := f.client.StatFs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &fs.Usage{
|
||||
Total: fs.NewUsageValue(int64(info.Capacity)),
|
||||
Used: fs.NewUsageValue(int64(info.Used)),
|
||||
Free: fs.NewUsageValue(int64(info.Remaining)),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f *Fs) ensureDirectory(realpath string) error {
|
||||
info, err := f.client.Stat(realpath)
|
||||
|
||||
if e, ok := err.(*os.PathError); ok && e.Err == os.ErrNotExist {
|
||||
return fs.ErrorDirNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return fs.ErrorDirNotFound
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Fs) ensureFile(realpath string) (os.FileInfo, error) {
|
||||
info, err := f.client.Stat(realpath)
|
||||
|
||||
if e, ok := err.(*os.PathError); ok && e.Err == os.ErrNotExist {
|
||||
return nil, fs.ErrorObjectNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil, fs.ErrorObjectNotFound
|
||||
}
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
func (f *Fs) realpath(dir string) string {
|
||||
return f.opt.Enc.FromStandardPath(xPath(f.Root(), dir))
|
||||
}
|
||||
|
||||
// Check the interfaces are satisfied
|
||||
var (
|
||||
_ fs.Fs = (*Fs)(nil)
|
||||
_ fs.Purger = (*Fs)(nil)
|
||||
_ fs.PutStreamer = (*Fs)(nil)
|
||||
_ fs.Abouter = (*Fs)(nil)
|
||||
)
|
58
backend/hdfs/hdfs.go
Normal file
58
backend/hdfs/hdfs.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
// +build !plan9
|
||||
|
||||
package hdfs
|
||||
|
||||
import (
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config"
|
||||
"github.com/rclone/rclone/lib/encoder"
|
||||
)
|
||||
|
||||
func init() {
|
||||
fsi := &fs.RegInfo{
|
||||
Name: "hdfs",
|
||||
Description: "Hadoop distributed file system",
|
||||
NewFs: NewFs,
|
||||
Options: []fs.Option{{
|
||||
Name: "namenode",
|
||||
Help: "hadoop name node and port",
|
||||
Required: true,
|
||||
Examples: []fs.OptionExample{{
|
||||
Value: "namenode:8020",
|
||||
Help: "Connect to host namenode at port 8020",
|
||||
}},
|
||||
}, {
|
||||
Name: "username",
|
||||
Help: "hadoop user name",
|
||||
Required: false,
|
||||
Examples: []fs.OptionExample{{
|
||||
Value: "root",
|
||||
Help: "Connect to hdfs as root",
|
||||
}},
|
||||
}, {
|
||||
Name: config.ConfigEncoding,
|
||||
Help: config.ConfigEncodingHelp,
|
||||
Advanced: true,
|
||||
Default: (encoder.Display | encoder.EncodeInvalidUtf8 | encoder.EncodeColon),
|
||||
}},
|
||||
}
|
||||
fs.Register(fsi)
|
||||
}
|
||||
|
||||
// Options for this backend
|
||||
type Options struct {
|
||||
Namenode string `config:"namenode"`
|
||||
Username string `config:"username"`
|
||||
Enc encoder.MultiEncoder `config:"encoding"`
|
||||
}
|
||||
|
||||
// xPath make correct file path with leading '/'
|
||||
func xPath(root string, tail string) string {
|
||||
if !strings.HasPrefix(root, "/") {
|
||||
root = "/" + root
|
||||
}
|
||||
return path.Join(root, tail)
|
||||
}
|
20
backend/hdfs/hdfs_test.go
Normal file
20
backend/hdfs/hdfs_test.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
// Test HDFS filesystem interface
|
||||
|
||||
// +build !plan9
|
||||
|
||||
package hdfs_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/rclone/rclone/backend/hdfs"
|
||||
"github.com/rclone/rclone/fstest/fstests"
|
||||
)
|
||||
|
||||
// TestIntegration runs integration tests against the remote
|
||||
func TestIntegration(t *testing.T) {
|
||||
fstests.Run(t, &fstests.Opt{
|
||||
RemoteName: "TestHdfs:",
|
||||
NilObject: (*hdfs.Object)(nil),
|
||||
})
|
||||
}
|
6
backend/hdfs/hdfs_unsupported.go
Normal file
6
backend/hdfs/hdfs_unsupported.go
Normal file
|
@ -0,0 +1,6 @@
|
|||
// Build for hdfs for unsupported platforms to stop go complaining
|
||||
// about "no buildable Go source files "
|
||||
|
||||
// +build plan9
|
||||
|
||||
package hdfs
|
177
backend/hdfs/object.go
Normal file
177
backend/hdfs/object.go
Normal file
|
@ -0,0 +1,177 @@
|
|||
// +build !plan9
|
||||
|
||||
package hdfs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/lib/readers"
|
||||
)
|
||||
|
||||
// Object describes an HDFS file
|
||||
type Object struct {
|
||||
fs *Fs
|
||||
remote string
|
||||
size int64
|
||||
modTime time.Time
|
||||
}
|
||||
|
||||
// Fs returns the parent Fs
|
||||
func (o *Object) Fs() fs.Info {
|
||||
return o.fs
|
||||
}
|
||||
|
||||
// Remote returns the remote path
|
||||
func (o *Object) Remote() string {
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Size returns the size of an object in bytes
|
||||
func (o *Object) Size() int64 {
|
||||
return o.size
|
||||
}
|
||||
|
||||
// ModTime returns the modification time of the object
|
||||
func (o *Object) ModTime(ctx context.Context) time.Time {
|
||||
return o.modTime
|
||||
}
|
||||
|
||||
// SetModTime sets the modification time of the local fs object
|
||||
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
||||
realpath := o.fs.realpath(o.Remote())
|
||||
err := o.fs.client.Chtimes(realpath, modTime, modTime)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.modTime = modTime
|
||||
return nil
|
||||
}
|
||||
|
||||
// Storable returns whether this object is storable
|
||||
func (o *Object) Storable() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Return a string version
|
||||
func (o *Object) String() string {
|
||||
if o == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return o.Remote()
|
||||
}
|
||||
|
||||
// Hash is not supported
|
||||
func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
|
||||
return "", hash.ErrUnsupported
|
||||
}
|
||||
|
||||
// Open an object for read
|
||||
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||
realpath := o.realpath()
|
||||
fs.Debugf(o.fs, "open [%s]", realpath)
|
||||
f, err := o.fs.client.Open(realpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var offset, limit int64 = 0, -1
|
||||
for _, option := range options {
|
||||
switch x := option.(type) {
|
||||
case *fs.SeekOption:
|
||||
offset = x.Offset
|
||||
case *fs.RangeOption:
|
||||
offset, limit = x.Decode(o.Size())
|
||||
}
|
||||
}
|
||||
|
||||
_, err = f.Seek(offset, io.SeekStart)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if limit != -1 {
|
||||
in = readers.NewLimitedReadCloser(f, limit)
|
||||
} else {
|
||||
in = f
|
||||
}
|
||||
|
||||
return in, err
|
||||
}
|
||||
|
||||
// Update object
|
||||
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
||||
realpath := o.fs.realpath(src.Remote())
|
||||
dirname := path.Dir(realpath)
|
||||
fs.Debugf(o.fs, "update [%s]", realpath)
|
||||
|
||||
err := o.fs.client.MkdirAll(dirname, 755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
info, err := o.fs.client.Stat(realpath)
|
||||
if err == nil {
|
||||
err = o.fs.client.Remove(realpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
out, err := o.fs.client.Create(realpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cleanup := func() {
|
||||
rerr := o.fs.client.Remove(realpath)
|
||||
if rerr != nil {
|
||||
fs.Errorf(o.fs, "failed to remove [%v]: %v", realpath, rerr)
|
||||
}
|
||||
}
|
||||
|
||||
_, err = io.Copy(out, in)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
err = out.Close()
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
info, err = o.fs.client.Stat(realpath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = o.SetModTime(ctx, src.ModTime(ctx))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.size = info.Size()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove an object
|
||||
func (o *Object) Remove(ctx context.Context) error {
|
||||
realpath := o.fs.realpath(o.remote)
|
||||
fs.Debugf(o.fs, "remove [%s]", realpath)
|
||||
return o.fs.client.Remove(realpath)
|
||||
}
|
||||
|
||||
func (o *Object) realpath() string {
|
||||
return o.fs.opt.Enc.FromStandardPath(xPath(o.Fs().Root(), o.remote))
|
||||
}
|
||||
|
||||
// Check the interfaces are satisfied
|
||||
var (
|
||||
_ fs.Object = (*Object)(nil)
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue