pikpak: implement custom hash to replace wrong sha1

This improves PikPak's file integrity verification by implementing a custom 
hash function named gcid and replacing the previously used SHA-1 hash.
This commit is contained in:
wiserain 2024-06-20 00:57:21 +09:00 committed by GitHub
parent cbccad9491
commit 300851e8bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 63 additions and 44 deletions

View file

@ -176,7 +176,7 @@ type File struct {
FileCategory string `json:"file_category,omitempty"` // "AUDIO", "VIDEO" FileCategory string `json:"file_category,omitempty"` // "AUDIO", "VIDEO"
FileExtension string `json:"file_extension,omitempty"` FileExtension string `json:"file_extension,omitempty"`
FolderType string `json:"folder_type,omitempty"` FolderType string `json:"folder_type,omitempty"`
Hash string `json:"hash,omitempty"` // sha1 but NOT a valid file hash. looks like a torrent hash Hash string `json:"hash,omitempty"` // custom hash with a form of sha1sum
IconLink string `json:"icon_link,omitempty"` IconLink string `json:"icon_link,omitempty"`
ID string `json:"id,omitempty"` ID string `json:"id,omitempty"`
Kind string `json:"kind,omitempty"` // "drive#file" Kind string `json:"kind,omitempty"` // "drive#file"
@ -486,7 +486,7 @@ type RequestNewFile struct {
ParentID string `json:"parent_id"` ParentID string `json:"parent_id"`
FolderType string `json:"folder_type"` FolderType string `json:"folder_type"`
// only when uploading a new file // only when uploading a new file
Hash string `json:"hash,omitempty"` // sha1sum Hash string `json:"hash,omitempty"` // gcid
Resumable map[string]string `json:"resumable,omitempty"` // {"provider": "PROVIDER_ALIYUN"} Resumable map[string]string `json:"resumable,omitempty"` // {"provider": "PROVIDER_ALIYUN"}
Size int64 `json:"size,omitempty"` Size int64 `json:"size,omitempty"`
UploadType string `json:"upload_type,omitempty"` // "UPLOAD_TYPE_FORM" or "UPLOAD_TYPE_RESUMABLE" UploadType string `json:"upload_type,omitempty"` // "UPLOAD_TYPE_FORM" or "UPLOAD_TYPE_RESUMABLE"

View file

@ -20,7 +20,7 @@ import (
// Globals // Globals
const ( const (
cachePrefix = "rclone-pikpak-sha1sum-" cachePrefix = "rclone-pikpak-gcid-"
) )
// requestDecompress requests decompress of compressed files // requestDecompress requests decompress of compressed files
@ -151,6 +151,9 @@ func (f *Fs) getFile(ctx context.Context, ID string) (info *api.File, err error)
} }
return f.shouldRetry(ctx, resp, err) return f.shouldRetry(ctx, resp, err)
}) })
if err == nil {
info.Name = f.opt.Enc.ToStandardName(info.Name)
}
return return
} }
@ -250,16 +253,11 @@ func (f *Fs) requestShare(ctx context.Context, req *api.RequestShare) (info *api
return return
} }
// Read the sha1 of in returning a reader which will read the same contents // Read the gcid of in returning a reader which will read the same contents
// //
// The cleanup function should be called when out is finished with // The cleanup function should be called when out is finished with
// regardless of whether this function returned an error or not. // regardless of whether this function returned an error or not.
func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reader, cleanup func(), err error) { func readGcid(in io.Reader, size, threshold int64) (gcid string, out io.Reader, cleanup func(), err error) {
// we need an SHA1
hash := sha1.New()
// use the teeReader to write to the local file AND calculate the SHA1 while doing so
teeReader := io.TeeReader(in, hash)
// nothing to clean up by default // nothing to clean up by default
cleanup = func() {} cleanup = func() {}
@ -282,8 +280,11 @@ func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reade
_ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already _ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already
} }
// copy the ENTIRE file to disc and calculate the SHA1 in the process // use the teeReader to write to the local file AND calculate the gcid while doing so
if _, err = io.Copy(tempFile, teeReader); err != nil { teeReader := io.TeeReader(in, tempFile)
// copy the ENTIRE file to disk and calculate the gcid in the process
if gcid, err = calcGcid(teeReader, size); err != nil {
return return
} }
// jump to the start of the local file so we can pass it along // jump to the start of the local file so we can pass it along
@ -294,15 +295,38 @@ func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reade
// replace the already read source with a reader of our cached file // replace the already read source with a reader of our cached file
out = tempFile out = tempFile
} else { } else {
// that's a small file, just read it into memory buf := &bytes.Buffer{}
var inData []byte teeReader := io.TeeReader(in, buf)
inData, err = io.ReadAll(teeReader)
if err != nil { if gcid, err = calcGcid(teeReader, size); err != nil {
return return
} }
out = buf
// set the reader to our read memory block
out = bytes.NewReader(inData)
} }
return hex.EncodeToString(hash.Sum(nil)), out, cleanup, nil return
}
func calcGcid(r io.Reader, size int64) (string, error) {
calcBlockSize := func(j int64) int64 {
var psize int64 = 0x40000
for float64(j)/float64(psize) > 0x200 && psize < 0x200000 {
psize = psize << 1
}
return psize
}
totalHash := sha1.New()
blockHash := sha1.New()
readSize := calcBlockSize(size)
for {
blockHash.Reset()
if n, err := io.CopyN(blockHash, r, readSize); err != nil && n == 0 {
if err != io.EOF {
return "", err
}
break
}
totalHash.Write(blockHash.Sum(nil))
}
return hex.EncodeToString(totalHash.Sum(nil)), nil
} }

View file

@ -7,8 +7,6 @@ package pikpak
// md5sum is not always available, sometimes given empty. // md5sum is not always available, sometimes given empty.
// sha1sum used for upload differs from the one with official apps.
// Trashed files are not restored to the original location when using `batchUntrash` // Trashed files are not restored to the original location when using `batchUntrash`
// Can't stream without `--vfs-cache-mode=full` // Can't stream without `--vfs-cache-mode=full`
@ -291,6 +289,7 @@ type Object struct {
modTime time.Time // modification time of the object modTime time.Time // modification time of the object
mimeType string // The object MIME type mimeType string // The object MIME type
parent string // ID of the parent directories parent string // ID of the parent directories
gcid string // custom hash of the object
md5sum string // md5sum of the object md5sum string // md5sum of the object
link *api.Link // link to download the object link *api.Link // link to download the object
linkMu *sync.Mutex linkMu *sync.Mutex
@ -1224,7 +1223,7 @@ func (f *Fs) uploadByResumable(ctx context.Context, in io.Reader, name string, s
return return
} }
func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, sha1Str string, size int64, options ...fs.OpenOption) (info *api.File, err error) { func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, gcid string, size int64, options ...fs.OpenOption) (info *api.File, err error) {
// determine upload type // determine upload type
uploadType := api.UploadTypeResumable uploadType := api.UploadTypeResumable
// if size >= 0 && size < int64(5*fs.Mebi) { // if size >= 0 && size < int64(5*fs.Mebi) {
@ -1239,7 +1238,7 @@ func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, sha1Str stri
ParentID: parentIDForRequest(dirID), ParentID: parentIDForRequest(dirID),
FolderType: "NORMAL", FolderType: "NORMAL",
Size: size, Size: size,
Hash: strings.ToUpper(sha1Str), Hash: strings.ToUpper(gcid),
UploadType: uploadType, UploadType: uploadType,
} }
if uploadType == api.UploadTypeResumable { if uploadType == api.UploadTypeResumable {
@ -1503,6 +1502,7 @@ func (o *Object) setMetaData(info *api.File) (err error) {
} else { } else {
o.parent = info.ParentID o.parent = info.ParentID
} }
o.gcid = info.Hash
o.md5sum = info.Md5Checksum o.md5sum = info.Md5Checksum
if info.Links.ApplicationOctetStream != nil { if info.Links.ApplicationOctetStream != nil {
o.link = info.Links.ApplicationOctetStream o.link = info.Links.ApplicationOctetStream
@ -1576,9 +1576,6 @@ func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
if t != hash.MD5 { if t != hash.MD5 {
return "", hash.ErrUnsupported return "", hash.ErrUnsupported
} }
if o.md5sum == "" {
return "", nil
}
return strings.ToLower(o.md5sum), nil return strings.ToLower(o.md5sum), nil
} }
@ -1702,25 +1699,23 @@ func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, wi
return err return err
} }
// Calculate sha1sum; grabbed from package jottacloud // Calculate gcid; grabbed from package jottacloud
hashStr, err := src.Hash(ctx, hash.SHA1) var gcid string
if err != nil || hashStr == "" { // unwrap the accounting from the input, we use wrap to put it
// unwrap the accounting from the input, we use wrap to put it // back on after the buffering
// back on after the buffering var wrap accounting.WrapFn
var wrap accounting.WrapFn in, wrap = accounting.UnWrap(in)
in, wrap = accounting.UnWrap(in) var cleanup func()
var cleanup func() gcid, in, cleanup, err = readGcid(in, size, int64(o.fs.opt.HashMemoryThreshold))
hashStr, in, cleanup, err = readSHA1(in, size, int64(o.fs.opt.HashMemoryThreshold)) defer cleanup()
defer cleanup() if err != nil {
if err != nil { return fmt.Errorf("failed to calculate gcid: %w", err)
return fmt.Errorf("failed to calculate SHA1: %w", err)
}
// Wrap the accounting back onto the stream
in = wrap(in)
} }
// Wrap the accounting back onto the stream
in = wrap(in)
if !withTemp { if !withTemp {
info, err := o.fs.upload(ctx, in, leaf, dirID, hashStr, size, options...) info, err := o.fs.upload(ctx, in, leaf, dirID, gcid, size, options...)
if err != nil { if err != nil {
return err return err
} }
@ -1729,7 +1724,7 @@ func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, wi
// We have to fall back to upload + rename // We have to fall back to upload + rename
tempName := "rcloneTemp" + random.String(8) tempName := "rcloneTemp" + random.String(8)
info, err := o.fs.upload(ctx, in, tempName, dirID, hashStr, size, options...) info, err := o.fs.upload(ctx, in, tempName, dirID, gcid, size, options...)
if err != nil { if err != nil {
return err return err
} }