forked from TrueCloudLab/rclone
pikpak: implement custom hash to replace wrong sha1
This improves PikPak's file integrity verification by implementing a custom hash function named gcid and replacing the previously used SHA-1 hash.
This commit is contained in:
parent
cbccad9491
commit
300851e8bf
3 changed files with 63 additions and 44 deletions
|
@ -176,7 +176,7 @@ type File struct {
|
|||
FileCategory string `json:"file_category,omitempty"` // "AUDIO", "VIDEO"
|
||||
FileExtension string `json:"file_extension,omitempty"`
|
||||
FolderType string `json:"folder_type,omitempty"`
|
||||
Hash string `json:"hash,omitempty"` // sha1 but NOT a valid file hash. looks like a torrent hash
|
||||
Hash string `json:"hash,omitempty"` // custom hash with a form of sha1sum
|
||||
IconLink string `json:"icon_link,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Kind string `json:"kind,omitempty"` // "drive#file"
|
||||
|
@ -486,7 +486,7 @@ type RequestNewFile struct {
|
|||
ParentID string `json:"parent_id"`
|
||||
FolderType string `json:"folder_type"`
|
||||
// only when uploading a new file
|
||||
Hash string `json:"hash,omitempty"` // sha1sum
|
||||
Hash string `json:"hash,omitempty"` // gcid
|
||||
Resumable map[string]string `json:"resumable,omitempty"` // {"provider": "PROVIDER_ALIYUN"}
|
||||
Size int64 `json:"size,omitempty"`
|
||||
UploadType string `json:"upload_type,omitempty"` // "UPLOAD_TYPE_FORM" or "UPLOAD_TYPE_RESUMABLE"
|
||||
|
|
|
@ -20,7 +20,7 @@ import (
|
|||
|
||||
// Globals
|
||||
const (
|
||||
cachePrefix = "rclone-pikpak-sha1sum-"
|
||||
cachePrefix = "rclone-pikpak-gcid-"
|
||||
)
|
||||
|
||||
// requestDecompress requests decompress of compressed files
|
||||
|
@ -151,6 +151,9 @@ func (f *Fs) getFile(ctx context.Context, ID string) (info *api.File, err error)
|
|||
}
|
||||
return f.shouldRetry(ctx, resp, err)
|
||||
})
|
||||
if err == nil {
|
||||
info.Name = f.opt.Enc.ToStandardName(info.Name)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -250,16 +253,11 @@ func (f *Fs) requestShare(ctx context.Context, req *api.RequestShare) (info *api
|
|||
return
|
||||
}
|
||||
|
||||
// Read the sha1 of in returning a reader which will read the same contents
|
||||
// Read the gcid of in returning a reader which will read the same contents
|
||||
//
|
||||
// The cleanup function should be called when out is finished with
|
||||
// regardless of whether this function returned an error or not.
|
||||
func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reader, cleanup func(), err error) {
|
||||
// we need an SHA1
|
||||
hash := sha1.New()
|
||||
// use the teeReader to write to the local file AND calculate the SHA1 while doing so
|
||||
teeReader := io.TeeReader(in, hash)
|
||||
|
||||
func readGcid(in io.Reader, size, threshold int64) (gcid string, out io.Reader, cleanup func(), err error) {
|
||||
// nothing to clean up by default
|
||||
cleanup = func() {}
|
||||
|
||||
|
@ -282,8 +280,11 @@ func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reade
|
|||
_ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already
|
||||
}
|
||||
|
||||
// copy the ENTIRE file to disc and calculate the SHA1 in the process
|
||||
if _, err = io.Copy(tempFile, teeReader); err != nil {
|
||||
// use the teeReader to write to the local file AND calculate the gcid while doing so
|
||||
teeReader := io.TeeReader(in, tempFile)
|
||||
|
||||
// copy the ENTIRE file to disk and calculate the gcid in the process
|
||||
if gcid, err = calcGcid(teeReader, size); err != nil {
|
||||
return
|
||||
}
|
||||
// jump to the start of the local file so we can pass it along
|
||||
|
@ -294,15 +295,38 @@ func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reade
|
|||
// replace the already read source with a reader of our cached file
|
||||
out = tempFile
|
||||
} else {
|
||||
// that's a small file, just read it into memory
|
||||
var inData []byte
|
||||
inData, err = io.ReadAll(teeReader)
|
||||
if err != nil {
|
||||
buf := &bytes.Buffer{}
|
||||
teeReader := io.TeeReader(in, buf)
|
||||
|
||||
if gcid, err = calcGcid(teeReader, size); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// set the reader to our read memory block
|
||||
out = bytes.NewReader(inData)
|
||||
out = buf
|
||||
}
|
||||
return hex.EncodeToString(hash.Sum(nil)), out, cleanup, nil
|
||||
return
|
||||
}
|
||||
|
||||
func calcGcid(r io.Reader, size int64) (string, error) {
|
||||
calcBlockSize := func(j int64) int64 {
|
||||
var psize int64 = 0x40000
|
||||
for float64(j)/float64(psize) > 0x200 && psize < 0x200000 {
|
||||
psize = psize << 1
|
||||
}
|
||||
return psize
|
||||
}
|
||||
|
||||
totalHash := sha1.New()
|
||||
blockHash := sha1.New()
|
||||
readSize := calcBlockSize(size)
|
||||
for {
|
||||
blockHash.Reset()
|
||||
if n, err := io.CopyN(blockHash, r, readSize); err != nil && n == 0 {
|
||||
if err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
break
|
||||
}
|
||||
totalHash.Write(blockHash.Sum(nil))
|
||||
}
|
||||
return hex.EncodeToString(totalHash.Sum(nil)), nil
|
||||
}
|
||||
|
|
|
@ -7,8 +7,6 @@ package pikpak
|
|||
|
||||
// md5sum is not always available, sometimes given empty.
|
||||
|
||||
// sha1sum used for upload differs from the one with official apps.
|
||||
|
||||
// Trashed files are not restored to the original location when using `batchUntrash`
|
||||
|
||||
// Can't stream without `--vfs-cache-mode=full`
|
||||
|
@ -291,6 +289,7 @@ type Object struct {
|
|||
modTime time.Time // modification time of the object
|
||||
mimeType string // The object MIME type
|
||||
parent string // ID of the parent directories
|
||||
gcid string // custom hash of the object
|
||||
md5sum string // md5sum of the object
|
||||
link *api.Link // link to download the object
|
||||
linkMu *sync.Mutex
|
||||
|
@ -1224,7 +1223,7 @@ func (f *Fs) uploadByResumable(ctx context.Context, in io.Reader, name string, s
|
|||
return
|
||||
}
|
||||
|
||||
func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, sha1Str string, size int64, options ...fs.OpenOption) (info *api.File, err error) {
|
||||
func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, gcid string, size int64, options ...fs.OpenOption) (info *api.File, err error) {
|
||||
// determine upload type
|
||||
uploadType := api.UploadTypeResumable
|
||||
// if size >= 0 && size < int64(5*fs.Mebi) {
|
||||
|
@ -1239,7 +1238,7 @@ func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, sha1Str stri
|
|||
ParentID: parentIDForRequest(dirID),
|
||||
FolderType: "NORMAL",
|
||||
Size: size,
|
||||
Hash: strings.ToUpper(sha1Str),
|
||||
Hash: strings.ToUpper(gcid),
|
||||
UploadType: uploadType,
|
||||
}
|
||||
if uploadType == api.UploadTypeResumable {
|
||||
|
@ -1503,6 +1502,7 @@ func (o *Object) setMetaData(info *api.File) (err error) {
|
|||
} else {
|
||||
o.parent = info.ParentID
|
||||
}
|
||||
o.gcid = info.Hash
|
||||
o.md5sum = info.Md5Checksum
|
||||
if info.Links.ApplicationOctetStream != nil {
|
||||
o.link = info.Links.ApplicationOctetStream
|
||||
|
@ -1576,9 +1576,6 @@ func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
|
|||
if t != hash.MD5 {
|
||||
return "", hash.ErrUnsupported
|
||||
}
|
||||
if o.md5sum == "" {
|
||||
return "", nil
|
||||
}
|
||||
return strings.ToLower(o.md5sum), nil
|
||||
}
|
||||
|
||||
|
@ -1702,25 +1699,23 @@ func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, wi
|
|||
return err
|
||||
}
|
||||
|
||||
// Calculate sha1sum; grabbed from package jottacloud
|
||||
hashStr, err := src.Hash(ctx, hash.SHA1)
|
||||
if err != nil || hashStr == "" {
|
||||
// unwrap the accounting from the input, we use wrap to put it
|
||||
// back on after the buffering
|
||||
var wrap accounting.WrapFn
|
||||
in, wrap = accounting.UnWrap(in)
|
||||
var cleanup func()
|
||||
hashStr, in, cleanup, err = readSHA1(in, size, int64(o.fs.opt.HashMemoryThreshold))
|
||||
defer cleanup()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to calculate SHA1: %w", err)
|
||||
}
|
||||
// Wrap the accounting back onto the stream
|
||||
in = wrap(in)
|
||||
// Calculate gcid; grabbed from package jottacloud
|
||||
var gcid string
|
||||
// unwrap the accounting from the input, we use wrap to put it
|
||||
// back on after the buffering
|
||||
var wrap accounting.WrapFn
|
||||
in, wrap = accounting.UnWrap(in)
|
||||
var cleanup func()
|
||||
gcid, in, cleanup, err = readGcid(in, size, int64(o.fs.opt.HashMemoryThreshold))
|
||||
defer cleanup()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to calculate gcid: %w", err)
|
||||
}
|
||||
// Wrap the accounting back onto the stream
|
||||
in = wrap(in)
|
||||
|
||||
if !withTemp {
|
||||
info, err := o.fs.upload(ctx, in, leaf, dirID, hashStr, size, options...)
|
||||
info, err := o.fs.upload(ctx, in, leaf, dirID, gcid, size, options...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1729,7 +1724,7 @@ func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, wi
|
|||
|
||||
// We have to fall back to upload + rename
|
||||
tempName := "rcloneTemp" + random.String(8)
|
||||
info, err := o.fs.upload(ctx, in, tempName, dirID, hashStr, size, options...)
|
||||
info, err := o.fs.upload(ctx, in, tempName, dirID, gcid, size, options...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue