gcs: Fix download of "Content-Encoding: gzip" compressed objects
Before this change, if an object compressed with "Content-Encoding: gzip" was downloaded, a length and hash mismatch would occur since the as the go runtime automatically decompressed the object on download. This change erases the length and hash on compressed objects so they can be downloaded successfully, at the cost of not being able to check the length or the hash of the downloaded object. This also adds the --gcs-download-compressed flag to allow the compressed files to be downloaded as-is providing compressed objects with intact size and hash information. Fixes #2658
This commit is contained in:
parent
3d55f69338
commit
2781f8e2f1
1 changed files with 48 additions and 10 deletions
|
@ -24,6 +24,7 @@ import (
|
|||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
|
@ -304,6 +305,21 @@ rclone does if you know the bucket exists already.
|
|||
`,
|
||||
Default: false,
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "download_compressed",
|
||||
Help: `If set this will download compressed objects as-is.
|
||||
|
||||
It is possible to upload objects to GCS with "Content-Encoding: gzip"
|
||||
set. Normally rclone will transparently decompress these files on
|
||||
download. This means that rclone can't check the hash or the size of
|
||||
the file as both of these refer to the compressed object.
|
||||
|
||||
If this flag is set then rclone will download files with
|
||||
"Content-Encoding: gzip" as they are received. This means that rclone
|
||||
can check the size and hash but the file contents will be compressed.
|
||||
`,
|
||||
Advanced: true,
|
||||
Default: false,
|
||||
}, {
|
||||
Name: config.ConfigEncoding,
|
||||
Help: config.ConfigEncodingHelp,
|
||||
|
@ -327,6 +343,7 @@ type Options struct {
|
|||
Location string `config:"location"`
|
||||
StorageClass string `config:"storage_class"`
|
||||
NoCheckBucket bool `config:"no_check_bucket"`
|
||||
DownloadCompressed bool `config:"download_compressed"`
|
||||
Enc encoder.MultiEncoder `config:"encoding"`
|
||||
}
|
||||
|
||||
|
@ -342,6 +359,7 @@ type Fs struct {
|
|||
rootDirectory string // directory part of root (if any)
|
||||
cache *bucket.Cache // cache of bucket status
|
||||
pacer *fs.Pacer // To pace the API calls
|
||||
warnCompressed sync.Once // warn once about compressed files
|
||||
}
|
||||
|
||||
// Object describes a storage object
|
||||
|
@ -355,6 +373,7 @@ type Object struct {
|
|||
bytes int64 // Bytes in the object
|
||||
modTime time.Time // Modified time of the object
|
||||
mimeType string
|
||||
gzipped bool // set if object has Content-Encoding: gzip
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
|
@ -975,6 +994,7 @@ func (o *Object) setMetaData(info *storage.Object) {
|
|||
o.url = info.MediaLink
|
||||
o.bytes = int64(info.Size)
|
||||
o.mimeType = info.ContentType
|
||||
o.gzipped = info.ContentEncoding == "gzip"
|
||||
|
||||
// Read md5sum
|
||||
md5sumData, err := base64.StdEncoding.DecodeString(info.Md5Hash)
|
||||
|
@ -1013,6 +1033,15 @@ func (o *Object) setMetaData(info *storage.Object) {
|
|||
} else {
|
||||
o.modTime = modTime
|
||||
}
|
||||
|
||||
// If gunzipping then size and md5sum are unknown
|
||||
if o.gzipped && !o.fs.opt.DownloadCompressed {
|
||||
o.bytes = -1
|
||||
o.md5sum = ""
|
||||
o.fs.warnCompressed.Do(func() {
|
||||
fs.Logf(o.fs, "Decompressing 'Content-Encoding: gzip' compressed file. Use --gcs-download-compressed to override")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// readObjectInfo reads the definition for an object
|
||||
|
@ -1113,6 +1142,15 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
|
|||
return nil, err
|
||||
}
|
||||
fs.FixRangeOption(options, o.bytes)
|
||||
if o.gzipped && o.fs.opt.DownloadCompressed {
|
||||
// Allow files which are stored on the cloud storage system
|
||||
// compressed to be downloaded without being decompressed. Note
|
||||
// that setting this here overrides the automatic decompression
|
||||
// in the Transport.
|
||||
//
|
||||
// See: https://cloud.google.com/storage/docs/transcoding
|
||||
req.Header.Set("Accept-Encoding", "gzip")
|
||||
}
|
||||
fs.OpenOptionAddHTTPHeaders(req.Header, options)
|
||||
var res *http.Response
|
||||
err = o.fs.pacer.Call(func() (bool, error) {
|
||||
|
|
Loading…
Reference in a new issue