diff --git a/go.sum b/go.sum index fba59e59f..90ba32db0 100644 --- a/go.sum +++ b/go.sum @@ -501,6 +501,7 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384 h1:TFlARGu6Czu1z7q93HTxcP1P+/ZFC/IKythI5RzrnRg= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= diff --git a/vendor/github.com/OneOfOne/xxhash/.gitignore b/vendor/github.com/OneOfOne/xxhash/.gitignore new file mode 100644 index 000000000..f4faa7f8f --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/.gitignore @@ -0,0 +1,4 @@ +*.txt +*.pprof +cmap2/ +cache/ diff --git a/vendor/github.com/OneOfOne/xxhash/.travis.yml b/vendor/github.com/OneOfOne/xxhash/.travis.yml new file mode 100644 index 000000000..1c6dc55bc --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/.travis.yml @@ -0,0 +1,13 @@ +language: go +sudo: false + +go: + - "1.10" + - "1.11" + - "1.12" + - master + +script: + - go test -tags safe ./... + - go test ./... + - diff --git a/vendor/github.com/OneOfOne/xxhash/LICENSE b/vendor/github.com/OneOfOne/xxhash/LICENSE new file mode 100644 index 000000000..9e30b4f34 --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/LICENSE @@ -0,0 +1,187 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. diff --git a/vendor/github.com/OneOfOne/xxhash/README.md b/vendor/github.com/OneOfOne/xxhash/README.md new file mode 100644 index 000000000..23174eb56 --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/README.md @@ -0,0 +1,75 @@ +# xxhash [![GoDoc](https://godoc.org/github.com/OneOfOne/xxhash?status.svg)](https://godoc.org/github.com/OneOfOne/xxhash) [![Build Status](https://travis-ci.org/OneOfOne/xxhash.svg?branch=master)](https://travis-ci.org/OneOfOne/xxhash) [![Coverage](https://gocover.io/_badge/github.com/OneOfOne/xxhash)](https://gocover.io/github.com/OneOfOne/xxhash) + +This is a native Go implementation of the excellent [xxhash](https://github.com/Cyan4973/xxHash)* algorithm, an extremely fast non-cryptographic Hash algorithm, working at speeds close to RAM limits. + +* The C implementation is ([Copyright](https://github.com/Cyan4973/xxHash/blob/master/LICENSE) (c) 2012-2014, Yann Collet) + +## Install + + go get github.com/OneOfOne/xxhash + +## Features + +* On Go 1.7+ the pure go version is faster than CGO for all inputs. +* Supports ChecksumString{32,64} xxhash{32,64}.WriteString, which uses no copies when it can, falls back to copy on appengine. +* The native version falls back to a less optimized version on appengine due to the lack of unsafe. +* Almost as fast as the mostly pure assembly version written by the brilliant [cespare](https://github.com/cespare/xxhash), while also supporting seeds. +* To manually toggle the appengine version build with `-tags safe`. + +## Benchmark + +### Core i7-4790 @ 3.60GHz, Linux 4.12.6-1-ARCH (64bit), Go tip (+ff90f4af66 2017-08-19) + +```bash +➤ go test -bench '64' -count 5 -tags cespare | benchstat /dev/stdin +name time/op + +# https://github.com/cespare/xxhash +XXSum64Cespare/Func-8 160ns ± 2% +XXSum64Cespare/Struct-8 173ns ± 1% +XXSum64ShortCespare/Func-8 6.78ns ± 1% +XXSum64ShortCespare/Struct-8 19.6ns ± 2% + +# this package (default mode, using unsafe) +XXSum64/Func-8 170ns ± 1% +XXSum64/Struct-8 182ns ± 1% +XXSum64Short/Func-8 13.5ns ± 3% +XXSum64Short/Struct-8 20.4ns ± 0% + +# this package (appengine, *not* using unsafe) +XXSum64/Func-8 241ns ± 5% +XXSum64/Struct-8 243ns ± 6% +XXSum64Short/Func-8 15.2ns ± 2% +XXSum64Short/Struct-8 23.7ns ± 5% + +CRC64ISO-8 1.23µs ± 1% +CRC64ISOString-8 2.71µs ± 4% +CRC64ISOShort-8 22.2ns ± 3% + +Fnv64-8 2.34µs ± 1% +Fnv64Short-8 74.7ns ± 8% +# +``` + +## Usage + +```go + h := xxhash.New64() + // r, err := os.Open("......") + // defer f.Close() + r := strings.NewReader(F) + io.Copy(h, r) + fmt.Println("xxhash.Backend:", xxhash.Backend) + fmt.Println("File checksum:", h.Sum64()) +``` + +[playground](http://play.golang.org/p/rhRN3RdQyd) + +## TODO + +* Rewrite the 32bit version to be more optimized. +* General cleanup as the Go inliner gets smarter. + +## License + +This project is released under the Apache v2. licence. See [LICENCE](LICENCE) for more details. diff --git a/vendor/github.com/OneOfOne/xxhash/go.mod b/vendor/github.com/OneOfOne/xxhash/go.mod new file mode 100644 index 000000000..c6da85e0a --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/go.mod @@ -0,0 +1,3 @@ +module github.com/OneOfOne/xxhash + +go 1.11 diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash.go b/vendor/github.com/OneOfOne/xxhash/xxhash.go new file mode 100644 index 000000000..2387d6593 --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/xxhash.go @@ -0,0 +1,189 @@ +package xxhash + +const ( + prime32x1 uint32 = 2654435761 + prime32x2 uint32 = 2246822519 + prime32x3 uint32 = 3266489917 + prime32x4 uint32 = 668265263 + prime32x5 uint32 = 374761393 + + prime64x1 uint64 = 11400714785074694791 + prime64x2 uint64 = 14029467366897019727 + prime64x3 uint64 = 1609587929392839161 + prime64x4 uint64 = 9650029242287828579 + prime64x5 uint64 = 2870177450012600261 + + maxInt32 int32 = (1<<31 - 1) + + // precomputed zero Vs for seed 0 + zero64x1 = 0x60ea27eeadc0b5d6 + zero64x2 = 0xc2b2ae3d27d4eb4f + zero64x3 = 0x0 + zero64x4 = 0x61c8864e7a143579 +) + +// Checksum32 returns the checksum of the input data with the seed set to 0. +func Checksum32(in []byte) uint32 { + return Checksum32S(in, 0) +} + +// ChecksumString32 returns the checksum of the input data, without creating a copy, with the seed set to 0. +func ChecksumString32(s string) uint32 { + return ChecksumString32S(s, 0) +} + +type XXHash32 struct { + mem [16]byte + ln, memIdx int32 + v1, v2, v3, v4 uint32 + seed uint32 +} + +// Size returns the number of bytes Sum will return. +func (xx *XXHash32) Size() int { + return 4 +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (xx *XXHash32) BlockSize() int { + return 16 +} + +// NewS32 creates a new hash.Hash32 computing the 32bit xxHash checksum starting with the specific seed. +func NewS32(seed uint32) (xx *XXHash32) { + xx = &XXHash32{ + seed: seed, + } + xx.Reset() + return +} + +// New32 creates a new hash.Hash32 computing the 32bit xxHash checksum starting with the seed set to 0. +func New32() *XXHash32 { + return NewS32(0) +} + +func (xx *XXHash32) Reset() { + xx.v1 = xx.seed + prime32x1 + prime32x2 + xx.v2 = xx.seed + prime32x2 + xx.v3 = xx.seed + xx.v4 = xx.seed - prime32x1 + xx.ln, xx.memIdx = 0, 0 +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (xx *XXHash32) Sum(in []byte) []byte { + s := xx.Sum32() + return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) +} + +// Checksum64 an alias for Checksum64S(in, 0) +func Checksum64(in []byte) uint64 { + return Checksum64S(in, 0) +} + +// ChecksumString64 returns the checksum of the input data, without creating a copy, with the seed set to 0. +func ChecksumString64(s string) uint64 { + return ChecksumString64S(s, 0) +} + +type XXHash64 struct { + v1, v2, v3, v4 uint64 + seed uint64 + ln uint64 + mem [32]byte + memIdx int8 +} + +// Size returns the number of bytes Sum will return. +func (xx *XXHash64) Size() int { + return 8 +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (xx *XXHash64) BlockSize() int { + return 32 +} + +// NewS64 creates a new hash.Hash64 computing the 64bit xxHash checksum starting with the specific seed. +func NewS64(seed uint64) (xx *XXHash64) { + xx = &XXHash64{ + seed: seed, + } + xx.Reset() + return +} + +// New64 creates a new hash.Hash64 computing the 64bit xxHash checksum starting with the seed set to 0x0. +func New64() *XXHash64 { + return NewS64(0) +} + +func (xx *XXHash64) Reset() { + xx.ln, xx.memIdx = 0, 0 + xx.v1, xx.v2, xx.v3, xx.v4 = resetVs64(xx.seed) +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (xx *XXHash64) Sum(in []byte) []byte { + s := xx.Sum64() + return append(in, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) +} + +// force the compiler to use ROTL instructions + +func rotl32_1(x uint32) uint32 { return (x << 1) | (x >> (32 - 1)) } +func rotl32_7(x uint32) uint32 { return (x << 7) | (x >> (32 - 7)) } +func rotl32_11(x uint32) uint32 { return (x << 11) | (x >> (32 - 11)) } +func rotl32_12(x uint32) uint32 { return (x << 12) | (x >> (32 - 12)) } +func rotl32_13(x uint32) uint32 { return (x << 13) | (x >> (32 - 13)) } +func rotl32_17(x uint32) uint32 { return (x << 17) | (x >> (32 - 17)) } +func rotl32_18(x uint32) uint32 { return (x << 18) | (x >> (32 - 18)) } + +func rotl64_1(x uint64) uint64 { return (x << 1) | (x >> (64 - 1)) } +func rotl64_7(x uint64) uint64 { return (x << 7) | (x >> (64 - 7)) } +func rotl64_11(x uint64) uint64 { return (x << 11) | (x >> (64 - 11)) } +func rotl64_12(x uint64) uint64 { return (x << 12) | (x >> (64 - 12)) } +func rotl64_18(x uint64) uint64 { return (x << 18) | (x >> (64 - 18)) } +func rotl64_23(x uint64) uint64 { return (x << 23) | (x >> (64 - 23)) } +func rotl64_27(x uint64) uint64 { return (x << 27) | (x >> (64 - 27)) } +func rotl64_31(x uint64) uint64 { return (x << 31) | (x >> (64 - 31)) } + +func mix64(h uint64) uint64 { + h ^= h >> 33 + h *= prime64x2 + h ^= h >> 29 + h *= prime64x3 + h ^= h >> 32 + return h +} + +func resetVs64(seed uint64) (v1, v2, v3, v4 uint64) { + if seed == 0 { + return zero64x1, zero64x2, zero64x3, zero64x4 + } + return (seed + prime64x1 + prime64x2), (seed + prime64x2), (seed), (seed - prime64x1) +} + +// borrowed from cespare +func round64(h, v uint64) uint64 { + h += v * prime64x2 + h = rotl64_31(h) + h *= prime64x1 + return h +} + +func mergeRound64(h, v uint64) uint64 { + v = round64(0, v) + h ^= v + h = h*prime64x1 + prime64x4 + return h +} diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash_go17.go b/vendor/github.com/OneOfOne/xxhash/xxhash_go17.go new file mode 100644 index 000000000..ae48e0c5c --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/xxhash_go17.go @@ -0,0 +1,161 @@ +package xxhash + +func u32(in []byte) uint32 { + return uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 +} + +func u64(in []byte) uint64 { + return uint64(in[0]) | uint64(in[1])<<8 | uint64(in[2])<<16 | uint64(in[3])<<24 | uint64(in[4])<<32 | uint64(in[5])<<40 | uint64(in[6])<<48 | uint64(in[7])<<56 +} + +// Checksum32S returns the checksum of the input bytes with the specific seed. +func Checksum32S(in []byte, seed uint32) (h uint32) { + var i int + + if len(in) > 15 { + var ( + v1 = seed + prime32x1 + prime32x2 + v2 = seed + prime32x2 + v3 = seed + 0 + v4 = seed - prime32x1 + ) + for ; i < len(in)-15; i += 16 { + in := in[i : i+16 : len(in)] + v1 += u32(in[0:4:len(in)]) * prime32x2 + v1 = rotl32_13(v1) * prime32x1 + + v2 += u32(in[4:8:len(in)]) * prime32x2 + v2 = rotl32_13(v2) * prime32x1 + + v3 += u32(in[8:12:len(in)]) * prime32x2 + v3 = rotl32_13(v3) * prime32x1 + + v4 += u32(in[12:16:len(in)]) * prime32x2 + v4 = rotl32_13(v4) * prime32x1 + } + + h = rotl32_1(v1) + rotl32_7(v2) + rotl32_12(v3) + rotl32_18(v4) + + } else { + h = seed + prime32x5 + } + + h += uint32(len(in)) + for ; i <= len(in)-4; i += 4 { + in := in[i : i+4 : len(in)] + h += u32(in[0:4:len(in)]) * prime32x3 + h = rotl32_17(h) * prime32x4 + } + + for ; i < len(in); i++ { + h += uint32(in[i]) * prime32x5 + h = rotl32_11(h) * prime32x1 + } + + h ^= h >> 15 + h *= prime32x2 + h ^= h >> 13 + h *= prime32x3 + h ^= h >> 16 + + return +} + +func (xx *XXHash32) Write(in []byte) (n int, err error) { + i, ml := 0, int(xx.memIdx) + n = len(in) + xx.ln += int32(n) + + if d := 16 - ml; ml > 0 && ml+len(in) > 16 { + xx.memIdx += int32(copy(xx.mem[xx.memIdx:], in[:d])) + ml, in = 16, in[d:len(in):len(in)] + } else if ml+len(in) < 16 { + xx.memIdx += int32(copy(xx.mem[xx.memIdx:], in)) + return + } + + if ml > 0 { + i += 16 - ml + xx.memIdx += int32(copy(xx.mem[xx.memIdx:len(xx.mem):len(xx.mem)], in)) + in := xx.mem[:16:len(xx.mem)] + + xx.v1 += u32(in[0:4:len(in)]) * prime32x2 + xx.v1 = rotl32_13(xx.v1) * prime32x1 + + xx.v2 += u32(in[4:8:len(in)]) * prime32x2 + xx.v2 = rotl32_13(xx.v2) * prime32x1 + + xx.v3 += u32(in[8:12:len(in)]) * prime32x2 + xx.v3 = rotl32_13(xx.v3) * prime32x1 + + xx.v4 += u32(in[12:16:len(in)]) * prime32x2 + xx.v4 = rotl32_13(xx.v4) * prime32x1 + + xx.memIdx = 0 + } + + for ; i <= len(in)-16; i += 16 { + in := in[i : i+16 : len(in)] + xx.v1 += u32(in[0:4:len(in)]) * prime32x2 + xx.v1 = rotl32_13(xx.v1) * prime32x1 + + xx.v2 += u32(in[4:8:len(in)]) * prime32x2 + xx.v2 = rotl32_13(xx.v2) * prime32x1 + + xx.v3 += u32(in[8:12:len(in)]) * prime32x2 + xx.v3 = rotl32_13(xx.v3) * prime32x1 + + xx.v4 += u32(in[12:16:len(in)]) * prime32x2 + xx.v4 = rotl32_13(xx.v4) * prime32x1 + } + + if len(in)-i != 0 { + xx.memIdx += int32(copy(xx.mem[xx.memIdx:], in[i:len(in):len(in)])) + } + + return +} + +func (xx *XXHash32) Sum32() (h uint32) { + var i int32 + if xx.ln > 15 { + h = rotl32_1(xx.v1) + rotl32_7(xx.v2) + rotl32_12(xx.v3) + rotl32_18(xx.v4) + } else { + h = xx.seed + prime32x5 + } + + h += uint32(xx.ln) + + if xx.memIdx > 0 { + for ; i < xx.memIdx-3; i += 4 { + in := xx.mem[i : i+4 : len(xx.mem)] + h += u32(in[0:4:len(in)]) * prime32x3 + h = rotl32_17(h) * prime32x4 + } + + for ; i < xx.memIdx; i++ { + h += uint32(xx.mem[i]) * prime32x5 + h = rotl32_11(h) * prime32x1 + } + } + h ^= h >> 15 + h *= prime32x2 + h ^= h >> 13 + h *= prime32x3 + h ^= h >> 16 + + return +} + +// Checksum64S returns the 64bit xxhash checksum for a single input +func Checksum64S(in []byte, seed uint64) uint64 { + if len(in) == 0 && seed == 0 { + return 0xef46db3751d8e999 + } + + if len(in) > 31 { + return checksum64(in, seed) + } + + return checksum64Short(in, seed) +} diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash_safe.go b/vendor/github.com/OneOfOne/xxhash/xxhash_safe.go new file mode 100644 index 000000000..e92ec29e0 --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/xxhash_safe.go @@ -0,0 +1,183 @@ +// +build appengine safe ppc64le ppc64be mipsle mips s390x + +package xxhash + +// Backend returns the current version of xxhash being used. +const Backend = "GoSafe" + +func ChecksumString32S(s string, seed uint32) uint32 { + return Checksum32S([]byte(s), seed) +} + +func (xx *XXHash32) WriteString(s string) (int, error) { + if len(s) == 0 { + return 0, nil + } + return xx.Write([]byte(s)) +} + +func ChecksumString64S(s string, seed uint64) uint64 { + return Checksum64S([]byte(s), seed) +} + +func (xx *XXHash64) WriteString(s string) (int, error) { + if len(s) == 0 { + return 0, nil + } + return xx.Write([]byte(s)) +} + +func checksum64(in []byte, seed uint64) (h uint64) { + var ( + v1, v2, v3, v4 = resetVs64(seed) + + i int + ) + + for ; i < len(in)-31; i += 32 { + in := in[i : i+32 : len(in)] + v1 = round64(v1, u64(in[0:8:len(in)])) + v2 = round64(v2, u64(in[8:16:len(in)])) + v3 = round64(v3, u64(in[16:24:len(in)])) + v4 = round64(v4, u64(in[24:32:len(in)])) + } + + h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) + + h = mergeRound64(h, v1) + h = mergeRound64(h, v2) + h = mergeRound64(h, v3) + h = mergeRound64(h, v4) + + h += uint64(len(in)) + + for ; i < len(in)-7; i += 8 { + h ^= round64(0, u64(in[i:len(in):len(in)])) + h = rotl64_27(h)*prime64x1 + prime64x4 + } + + for ; i < len(in)-3; i += 4 { + h ^= uint64(u32(in[i:len(in):len(in)])) * prime64x1 + h = rotl64_23(h)*prime64x2 + prime64x3 + } + + for ; i < len(in); i++ { + h ^= uint64(in[i]) * prime64x5 + h = rotl64_11(h) * prime64x1 + } + + return mix64(h) +} + +func checksum64Short(in []byte, seed uint64) uint64 { + var ( + h = seed + prime64x5 + uint64(len(in)) + i int + ) + + for ; i < len(in)-7; i += 8 { + k := u64(in[i : i+8 : len(in)]) + h ^= round64(0, k) + h = rotl64_27(h)*prime64x1 + prime64x4 + } + + for ; i < len(in)-3; i += 4 { + h ^= uint64(u32(in[i:i+4:len(in)])) * prime64x1 + h = rotl64_23(h)*prime64x2 + prime64x3 + } + + for ; i < len(in); i++ { + h ^= uint64(in[i]) * prime64x5 + h = rotl64_11(h) * prime64x1 + } + + return mix64(h) +} + +func (xx *XXHash64) Write(in []byte) (n int, err error) { + var ( + ml = int(xx.memIdx) + d = 32 - ml + ) + + n = len(in) + xx.ln += uint64(n) + + if ml+len(in) < 32 { + xx.memIdx += int8(copy(xx.mem[xx.memIdx:len(xx.mem):len(xx.mem)], in)) + return + } + + i, v1, v2, v3, v4 := 0, xx.v1, xx.v2, xx.v3, xx.v4 + if ml > 0 && ml+len(in) > 32 { + xx.memIdx += int8(copy(xx.mem[xx.memIdx:len(xx.mem):len(xx.mem)], in[:d:len(in)])) + in = in[d:len(in):len(in)] + + in := xx.mem[0:32:len(xx.mem)] + + v1 = round64(v1, u64(in[0:8:len(in)])) + v2 = round64(v2, u64(in[8:16:len(in)])) + v3 = round64(v3, u64(in[16:24:len(in)])) + v4 = round64(v4, u64(in[24:32:len(in)])) + + xx.memIdx = 0 + } + + for ; i < len(in)-31; i += 32 { + in := in[i : i+32 : len(in)] + v1 = round64(v1, u64(in[0:8:len(in)])) + v2 = round64(v2, u64(in[8:16:len(in)])) + v3 = round64(v3, u64(in[16:24:len(in)])) + v4 = round64(v4, u64(in[24:32:len(in)])) + } + + if len(in)-i != 0 { + xx.memIdx += int8(copy(xx.mem[xx.memIdx:], in[i:len(in):len(in)])) + } + + xx.v1, xx.v2, xx.v3, xx.v4 = v1, v2, v3, v4 + + return +} + +func (xx *XXHash64) Sum64() (h uint64) { + var i int + if xx.ln > 31 { + v1, v2, v3, v4 := xx.v1, xx.v2, xx.v3, xx.v4 + h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) + + h = mergeRound64(h, v1) + h = mergeRound64(h, v2) + h = mergeRound64(h, v3) + h = mergeRound64(h, v4) + } else { + h = xx.seed + prime64x5 + } + + h += uint64(xx.ln) + if xx.memIdx > 0 { + in := xx.mem[:xx.memIdx] + for ; i < int(xx.memIdx)-7; i += 8 { + in := in[i : i+8 : len(in)] + k := u64(in[0:8:len(in)]) + k *= prime64x2 + k = rotl64_31(k) + k *= prime64x1 + h ^= k + h = rotl64_27(h)*prime64x1 + prime64x4 + } + + for ; i < int(xx.memIdx)-3; i += 4 { + in := in[i : i+4 : len(in)] + h ^= uint64(u32(in[0:4:len(in)])) * prime64x1 + h = rotl64_23(h)*prime64x2 + prime64x3 + } + + for ; i < int(xx.memIdx); i++ { + h ^= uint64(in[i]) * prime64x5 + h = rotl64_11(h) * prime64x1 + } + } + + return mix64(h) +} diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash_unsafe.go b/vendor/github.com/OneOfOne/xxhash/xxhash_unsafe.go new file mode 100644 index 000000000..10f2e8429 --- /dev/null +++ b/vendor/github.com/OneOfOne/xxhash/xxhash_unsafe.go @@ -0,0 +1,239 @@ +// +build !safe +// +build !appengine +// +build !ppc64le +// +build !mipsle +// +build !ppc64be +// +build !mips +// +build !s390x + +package xxhash + +import ( + "reflect" + "unsafe" +) + +// Backend returns the current version of xxhash being used. +const Backend = "GoUnsafe" + +// ChecksumString32S returns the checksum of the input data, without creating a copy, with the specific seed. +func ChecksumString32S(s string, seed uint32) uint32 { + if len(s) == 0 { + return Checksum32S(nil, seed) + } + ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) + return Checksum32S((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)], seed) +} + +func (xx *XXHash32) WriteString(s string) (int, error) { + if len(s) == 0 { + return 0, nil + } + + ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) + return xx.Write((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)]) +} + +// ChecksumString64S returns the checksum of the input data, without creating a copy, with the specific seed. +func ChecksumString64S(s string, seed uint64) uint64 { + if len(s) == 0 { + return Checksum64S(nil, seed) + } + + ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) + return Checksum64S((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)], seed) +} + +func (xx *XXHash64) WriteString(s string) (int, error) { + if len(s) == 0 { + return 0, nil + } + ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) + return xx.Write((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s)]) +} + +func checksum64(in []byte, seed uint64) uint64 { + var ( + wordsLen = len(in) >> 3 + words = ((*[maxInt32 / 8]uint64)(unsafe.Pointer(&in[0])))[:wordsLen:wordsLen] + + h uint64 = prime64x5 + + v1, v2, v3, v4 = resetVs64(seed) + + i int + ) + + for ; i < len(words)-3; i += 4 { + words := (*[4]uint64)(unsafe.Pointer(&words[i])) + + v1 = round64(v1, words[0]) + v2 = round64(v2, words[1]) + v3 = round64(v3, words[2]) + v4 = round64(v4, words[3]) + } + + h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) + + h = mergeRound64(h, v1) + h = mergeRound64(h, v2) + h = mergeRound64(h, v3) + h = mergeRound64(h, v4) + + h += uint64(len(in)) + + for _, k := range words[i:] { + h ^= round64(0, k) + h = rotl64_27(h)*prime64x1 + prime64x4 + } + + if in = in[wordsLen<<3 : len(in) : len(in)]; len(in) > 3 { + words := (*[1]uint32)(unsafe.Pointer(&in[0])) + h ^= uint64(words[0]) * prime64x1 + h = rotl64_23(h)*prime64x2 + prime64x3 + + in = in[4:len(in):len(in)] + } + + for _, b := range in { + h ^= uint64(b) * prime64x5 + h = rotl64_11(h) * prime64x1 + } + + return mix64(h) +} + +func checksum64Short(in []byte, seed uint64) uint64 { + var ( + h = seed + prime64x5 + uint64(len(in)) + i int + ) + + if len(in) > 7 { + var ( + wordsLen = len(in) >> 3 + words = ((*[maxInt32 / 8]uint64)(unsafe.Pointer(&in[0])))[:wordsLen:wordsLen] + ) + + for i := range words { + h ^= round64(0, words[i]) + h = rotl64_27(h)*prime64x1 + prime64x4 + } + + i = wordsLen << 3 + } + + if in = in[i:len(in):len(in)]; len(in) > 3 { + words := (*[1]uint32)(unsafe.Pointer(&in[0])) + h ^= uint64(words[0]) * prime64x1 + h = rotl64_23(h)*prime64x2 + prime64x3 + + in = in[4:len(in):len(in)] + } + + for _, b := range in { + h ^= uint64(b) * prime64x5 + h = rotl64_11(h) * prime64x1 + } + + return mix64(h) +} + +func (xx *XXHash64) Write(in []byte) (n int, err error) { + mem, idx := xx.mem[:], int(xx.memIdx) + + xx.ln, n = xx.ln+uint64(len(in)), len(in) + + if idx+len(in) < 32 { + xx.memIdx += int8(copy(mem[idx:len(mem):len(mem)], in)) + return + } + + var ( + v1, v2, v3, v4 = xx.v1, xx.v2, xx.v3, xx.v4 + + i int + ) + + if d := 32 - int(idx); d > 0 && int(idx)+len(in) > 31 { + copy(mem[idx:len(mem):len(mem)], in[:len(in):len(in)]) + + words := (*[4]uint64)(unsafe.Pointer(&mem[0])) + + v1 = round64(v1, words[0]) + v2 = round64(v2, words[1]) + v3 = round64(v3, words[2]) + v4 = round64(v4, words[3]) + + if in, xx.memIdx = in[d:len(in):len(in)], 0; len(in) == 0 { + goto RET + } + } + + for ; i < len(in)-31; i += 32 { + words := (*[4]uint64)(unsafe.Pointer(&in[i])) + + v1 = round64(v1, words[0]) + v2 = round64(v2, words[1]) + v3 = round64(v3, words[2]) + v4 = round64(v4, words[3]) + } + + if len(in)-i != 0 { + xx.memIdx += int8(copy(mem[xx.memIdx:len(mem):len(mem)], in[i:len(in):len(in)])) + } + +RET: + xx.v1, xx.v2, xx.v3, xx.v4 = v1, v2, v3, v4 + + return +} + +func (xx *XXHash64) Sum64() (h uint64) { + if seed := xx.seed; xx.ln > 31 { + v1, v2, v3, v4 := xx.v1, xx.v2, xx.v3, xx.v4 + h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) + + h = mergeRound64(h, v1) + h = mergeRound64(h, v2) + h = mergeRound64(h, v3) + h = mergeRound64(h, v4) + } else if seed == 0 { + h = prime64x5 + } else { + h = seed + prime64x5 + } + + h += uint64(xx.ln) + + if xx.memIdx == 0 { + return mix64(h) + } + + var ( + in = xx.mem[:xx.memIdx:xx.memIdx] + wordsLen = len(in) >> 3 + words = ((*[maxInt32 / 8]uint64)(unsafe.Pointer(&in[0])))[:wordsLen:wordsLen] + ) + + for _, k := range words { + h ^= round64(0, k) + h = rotl64_27(h)*prime64x1 + prime64x4 + } + + if in = in[wordsLen<<3 : len(in) : len(in)]; len(in) > 3 { + words := (*[1]uint32)(unsafe.Pointer(&in[0])) + + h ^= uint64(words[0]) * prime64x1 + h = rotl64_23(h)*prime64x2 + prime64x3 + + in = in[4:len(in):len(in)] + } + + for _, b := range in { + h ^= uint64(b) * prime64x5 + h = rotl64_11(h) * prime64x1 + } + + return mix64(h) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/CODE_OF_CONDUCT.md b/vendor/github.com/gabriel-vasile/mimetype/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..8479cd87d --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at vasile.gabriel@email.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/vendor/github.com/gabriel-vasile/mimetype/CONTRIBUTING.md b/vendor/github.com/gabriel-vasile/mimetype/CONTRIBUTING.md new file mode 100644 index 000000000..56ae4e57c --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/CONTRIBUTING.md @@ -0,0 +1,12 @@ +## Contribute +Contributions to **mimetype** are welcome. If you find an issue and you consider +contributing, you can use the [Github issues tracker](https://github.com/gabriel-vasile/mimetype/issues) +in order to report it, or better yet, open a pull request. + +Code contributions must respect these rules: + - code must be test covered + - code must be formatted using gofmt tool + - exported names must be documented + +**Important**: By submitting a pull request, you agree to allow the project +owner to license your work under the same license as that used by the project. diff --git a/vendor/github.com/gabriel-vasile/mimetype/LICENSE b/vendor/github.com/gabriel-vasile/mimetype/LICENSE new file mode 100644 index 000000000..f1b456e91 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018, 2019 Gabriel Vasile + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/gabriel-vasile/mimetype/README.md b/vendor/github.com/gabriel-vasile/mimetype/README.md new file mode 100644 index 000000000..3d7f4afc5 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/README.md @@ -0,0 +1,64 @@ +

+ mimetype +

+ +

+ A package for detecting MIME types and extensions based on magic numbers +

+
+ No bindings, all written in pure go +
+ +

+ + Build Status + + + Documentation + + + Go report card + + + Go report card + + + License + +

+ +## Install +```bash +go get github.com/gabriel-vasile/mimetype +``` + +## Use +The library exposes three functions you can use in order to detect a file type. +See [Godoc](https://godoc.org/github.com/gabriel-vasile/mimetype) for full reference. +```go +func Detect(in []byte) (mime, extension string) {...} +func DetectReader(r io.Reader) (mime, extension string, err error) {...} +func DetectFile(file string) (mime, extension string, err error) {...} +``` +When detecting from a `ReadSeeker` interface, such as `os.File`, make sure +to reset the offset of the reader to the beginning if needed: +```go +_, err = file.Seek(0, io.SeekStart) +``` + +## Supported MIME types +See [supported mimes](supported_mimes.md) for the list of detected MIME types. +If support is needed for a specific file format, please open an [issue](https://github.com/gabriel-vasile/mimetype/issues/new/choose). + +## Structure +**mimetype** uses an hierarchical structure to keep the matching functions. +This reduces the number of calls needed for detecting the file type. The reason +behind this choice is that there are file formats used as containers for other +file formats. For example, Microsoft office files are just zip archives, +containing specific metadata files. +
+ structure +
+ +## Contributing +See [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/vendor/github.com/gabriel-vasile/mimetype/go.mod b/vendor/github.com/gabriel-vasile/mimetype/go.mod new file mode 100644 index 000000000..6f8542d53 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/go.mod @@ -0,0 +1 @@ +module github.com/gabriel-vasile/mimetype diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go new file mode 100644 index 000000000..9aef6cb4c --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json.go @@ -0,0 +1,536 @@ +// Copyright (c) 2009 The Go Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// JSON value parser state machine. +// This package is almost entirely copied from the Go stdlib. +// Changes made to it permit users of the package to tell +// if some slice of bytes is a valid beginning of a json string. +package json + +import "fmt" + +type ( + context int + scanStatus int +) + +const ( + contextKey context = iota + contextObj + contextArr + + scanContinue scanStatus = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. +) + +type ( + scanner struct { + step func(*scanner, byte) scanStatus + contexts []context + endTop bool + err error + index int + } +) + +// Scan returns the number of bytes scanned and if there was any error +// in trying to reach the end of data +func Scan(data []byte) (int, error) { + s := &scanner{} + _ = checkValid(data, s) + return s.index, s.err +} + +// checkValid verifies that data is valid JSON-encoded data. +// scan is passed in for use by checkValid to avoid an allocation. +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.index++ + if scan.step(scan, c) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +func isSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' +} + +func (s *scanner) reset() { + s.step = stateBeginValue + s.contexts = s.contexts[0:0] + s.err = nil +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() scanStatus { + if s.err != nil { + return scanError + } + if s.endTop { + return scanEnd + } + s.step(s, ' ') + if s.endTop { + return scanEnd + } + if s.err == nil { + s.err = fmt.Errorf("unexpected end of JSON input") + } + return scanError +} + +// pushContext pushes a new parse state p onto the parse stack. +func (s *scanner) pushParseState(p context) { + s.contexts = append(s.contexts, p) +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.contexts) - 1 + s.contexts = s.contexts[0:n] + if n == 0 { + s.step = stateEndTop + s.endTop = true + } else { + s.step = stateEndValue + } +} + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c byte) scanStatus { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + s.pushParseState(contextKey) + return scanBeginObject + case '[': + s.step = stateBeginValueOrEmpty + s.pushParseState(contextArr) + return scanBeginArray + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == '}' { + n := len(s.contexts) + s.contexts[n-1] = contextObj + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c byte) scanStatus { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c byte) scanStatus { + n := len(s.contexts) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + s.endTop = true + return stateEndTop(s, c) + } + if c <= ' ' && isSpace(c) { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.contexts[n-1] + switch ps { + case contextKey: + if c == ':' { + s.contexts[n-1] = contextObj + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case contextObj: + if c == ',' { + s.contexts[n-1] = contextKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case contextArr: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c byte) scanStatus { + if c != ' ' && c != '\t' && c != '\r' && c != '\n' { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c byte) scanStatus { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c byte) scanStatus { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + case 'u': + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateNeg is the state after reading `-` during a number. +func stateNeg(s *scanner, c byte) scanStatus { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c byte) scanStatus { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' { + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c byte) scanStatus { + if c == '+' || c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c byte) scanStatus { + if '0' <= c && c <= '9' { + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c byte) scanStatus { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c byte) scanStatus { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c byte) scanStatus { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c byte) scanStatus { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c byte) scanStatus { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c byte) scanStatus { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c byte) scanStatus { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c byte) scanStatus { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c byte) scanStatus { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c byte) scanStatus { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c byte) scanStatus { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c byte, context string) scanStatus { + s.step = stateError + s.err = fmt.Errorf("invalid character <<%c>> %s", c, context) + return scanError +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/json/json_test.go b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json_test.go new file mode 100644 index 000000000..1e9c659fb --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/json/json_test.go @@ -0,0 +1,39 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "testing" + +var scanTests = []struct { + data string + length int + ok bool +}{ + {`foo`, 2, false}, + {`}{`, 1, false}, + {`{]`, 2, false}, + {`{}`, 2, true}, + {`{"foo":"bar"}`, 13, true}, + {`{"foo":"bar","bar":{"baz":["qux"]}`, 34, false}, + {`{"foo":"bar","bar":{"baz":["qux"]}}`, 35, true}, +} + +func TestScan(t *testing.T) { + for _, st := range scanTests { + scanned, err := Scan([]byte(st.data)) + if scanned != st.length { + t.Errorf("Scan length error: expected: %d; got: %d; input: %s", + st.length, scanned, st.data) + } + + if err != nil && st.ok { + t.Errorf("Scan failed with err: %s; input: %s", err, st.data) + } + + if err == nil && !st.ok { + t.Errorf("Scan should fail for input: %s", st.data) + } + } +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/archive.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/archive.go new file mode 100644 index 000000000..6dfd59df0 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/archive.go @@ -0,0 +1,70 @@ +package matchers + +import "bytes" + +// Zip matches a zip archive. +func Zip(in []byte) bool { + return len(in) > 3 && + in[0] == 0x50 && in[1] == 0x4B && + (in[2] == 0x3 || in[2] == 0x5 || in[2] == 0x7) && + (in[3] == 0x4 || in[3] == 0x6 || in[3] == 0x8) +} + +// SevenZ matches a 7z archive. +func SevenZ(in []byte) bool { + return len(in) > 6 && + bytes.Equal(in[:6], []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}) +} + +// Epub matches an EPUB file. +func Epub(in []byte) bool { + return len(in) > 58 && bytes.Equal(in[30:58], []byte("mimetypeapplication/epub+zip")) +} + +// Jar matches a Java archive file. +func Jar(in []byte) bool { + return bytes.Contains(in, []byte("META-INF/MANIFEST.MF")) +} + +// Gzip matched gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer. +func Gzip(in []byte) bool { + return len(in) > 2 && bytes.Equal(in[:2], []byte{0x1f, 0x8b}) +} + +// Crx matches a Chrome extension file: a zip archive prepended by "Cr24". +func Crx(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte("Cr24")) +} + +// Tar matches a (t)ape (ar)chive file. +func Tar(in []byte) bool { + return len(in) > 262 && bytes.Equal(in[257:262], []byte("ustar")) +} + +// Fits matches an Flexible Image Transport System file. +func Fits(in []byte) bool { + return bytes.HasPrefix(in, []byte{0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, + 0x20, 0x3D, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54}) +} + +// Xar matches an eXtensible ARchive format file. +func Xar(in []byte) bool { + return bytes.HasPrefix(in, []byte{0x78, 0x61, 0x72, 0x21}) +} + +// Bz2 matches a bzip2 file. +func Bz2(in []byte) bool { + return bytes.HasPrefix(in, []byte{0x42, 0x5A, 0x68}) +} + +// Ar matches an ar (Unix) archive file. +func Ar(in []byte) bool { + return bytes.HasPrefix(in, []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E}) +} + +// Deb matches a Debian package file +func Deb(in []byte) bool { + return len(in) > 8 && bytes.HasPrefix(in[8:], []byte{0x64, 0x65, 0x62, 0x69, + 0x61, 0x6E, 0x2D, 0x62, 0x69, 0x6E, 0x61, 0x72, 0x79}) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/audio.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/audio.go new file mode 100644 index 000000000..32f7440a5 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/audio.go @@ -0,0 +1,59 @@ +package matchers + +import ( + "bytes" +) + +// Mp3 matches an mp3 file. +func Mp3(in []byte) bool { + return bytes.HasPrefix(in, []byte("\x49\x44\x33")) +} + +// Flac matches a Free Lossless Audio Codec file. +func Flac(in []byte) bool { + return bytes.HasPrefix(in, []byte("\x66\x4C\x61\x43\x00\x00\x00\x22")) +} + +// Midi matches a Musical Instrument Digital Interface file. +func Midi(in []byte) bool { + return bytes.HasPrefix(in, []byte("\x4D\x54\x68\x64")) +} + +// Ape matches a Monkey's Audio file. +func Ape(in []byte) bool { + return bytes.HasPrefix(in, []byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3")) +} + +// MusePack matches a Musepack file. +func MusePack(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte("MPCK")) +} + +// Wav matches a Waveform Audio File Format file. +func Wav(in []byte) bool { + return len(in) > 12 && + bytes.Equal(in[:4], []byte("\x52\x49\x46\x46")) && + bytes.Equal(in[8:12], []byte("\x57\x41\x56\x45")) +} + +// Aiff matches Audio Interchange File Format file. +func Aiff(in []byte) bool { + return len(in) > 12 && + bytes.Equal(in[:4], []byte("\x46\x4F\x52\x4D")) && + bytes.Equal(in[8:12], []byte("\x41\x49\x46\x46")) +} + +// Ogg matches an Ogg file. +func Ogg(in []byte) bool { + return len(in) > 5 && bytes.Equal(in[:5], []byte("\x4F\x67\x67\x53\x00")) +} + +// Au matches a Sun Microsystems au file. +func Au(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte("\x2E\x73\x6E\x64")) +} + +// Amr matches an Adaptive Multi-Rate file. +func Amr(in []byte) bool { + return len(in) > 5 && bytes.Equal(in[:5], []byte("\x23\x21\x41\x4D\x52")) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/binary.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/binary.go new file mode 100644 index 000000000..eb864b18a --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/binary.go @@ -0,0 +1,85 @@ +package matchers + +import ( + "bytes" +) + +// Class matches an java class file. +func Class(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte{0xCA, 0xFE, 0xBA, 0xBE}) +} + +// Swf matches an Adobe Flash swf file. +func Swf(in []byte) bool { + return len(in) > 3 && + bytes.Equal(in[:3], []byte("CWS")) || + bytes.Equal(in[:3], []byte("FWS")) || + bytes.Equal(in[:3], []byte("ZWS")) +} + +// Wasm matches a web assembly File Format file. +func Wasm(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte{0x00, 0x61, 0x73, 0x6D}) +} + +// Dbf matches a dBase file. +// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm +func Dbf(in []byte) bool { + // 3rd and 4th bytes contain the last update month and day of month + if !(0 < in[2] && in[2] < 13 && 0 < in[3] && in[3] < 32) { + return false + } + + // dbf type is dictated by the first byte + dbfTypes := []byte{ + 0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82, + 0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB, + } + for _, b := range dbfTypes { + if in[0] == b { + return true + } + } + + return false +} + +// Exe matches a Windows/DOS executable file. +func Exe(in []byte) bool { + return bytes.HasPrefix(in, []byte{0x4D, 0x5A}) +} + +// Elf matches an Executable and Linkable Format file. +func Elf(in []byte) bool { + return bytes.HasPrefix(in, []byte{0x7F, 0x45, 0x4C, 0x46}) +} + +// ElfObj matches an object file. +func ElfObj(in []byte) bool { + return len(in) > 17 && ((in[16] == 0x01 && in[17] == 0x00) || + (in[16] == 0x00 && in[17] == 0x01)) +} + +// ElfExe matches an executable file. +func ElfExe(in []byte) bool { + return len(in) > 17 && ((in[16] == 0x02 && in[17] == 0x00) || + (in[16] == 0x00 && in[17] == 0x02)) +} + +// ElfLib matches a shared library file. +func ElfLib(in []byte) bool { + return len(in) > 17 && ((in[16] == 0x03 && in[17] == 0x00) || + (in[16] == 0x00 && in[17] == 0x03)) +} + +// ElfDump matches a core dump file. +func ElfDump(in []byte) bool { + return len(in) > 17 && ((in[16] == 0x04 && in[17] == 0x00) || + (in[16] == 0x00 && in[17] == 0x04)) +} + +// Dcm matches a DICOM medical format file. +func Dcm(in []byte) bool { + return len(in) > 131 && + bytes.Equal(in[128:132], []byte{0x44, 0x49, 0x43, 0x4D}) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/document.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/document.go new file mode 100644 index 000000000..cfa82671c --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/document.go @@ -0,0 +1,8 @@ +package matchers + +import "bytes" + +// Pdf matches a Portable Document Format file. +func Pdf(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte{0x25, 0x50, 0x44, 0x46}) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/fonts.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/fonts.go new file mode 100644 index 000000000..d5f62a793 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/fonts.go @@ -0,0 +1,13 @@ +package matchers + +import "bytes" + +// Woff matches a Web Open Font Format file. +func Woff(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte("wOFF")) +} + +// Woff2 matches a Web Open Font Format version 2 file. +func Woff2(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte("wOF2")) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/geo.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/geo.go new file mode 100644 index 000000000..4c82b5301 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/geo.go @@ -0,0 +1,44 @@ +package matchers + +import ( + "bytes" + "encoding/binary" +) + +// Shp matches a shape format file. +// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf +func Shp(in []byte) bool { + if len(in) < 112 { + return false + } + shapeTypes := []int{ + 0, // Null shape + 1, // Point + 3, // Polyline + 5, // Polygon + 8, // MultiPoint + 11, // PointZ + 13, // PolylineZ + 15, // PolygonZ + 18, // MultiPointZ + 21, // PointM + 23, // PolylineM + 25, // PolygonM + 28, // MultiPointM + 31, // MultiPatch + } + + for _, st := range shapeTypes { + if st == int(binary.LittleEndian.Uint32(in[108:112])) { + return true + } + } + + return false +} + +// Shx matches a shape index format file. +// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf +func Shx(in []byte) bool { + return len(in) > 4 && bytes.Equal(in[:4], []byte{0x00, 0x00, 0x27, 0x0A}) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/image.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/image.go new file mode 100644 index 000000000..df9c0c31d --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/image.go @@ -0,0 +1,56 @@ +package matchers + +import "bytes" + +// Png matches a Portable Network Graphics file. +func Png(in []byte) bool { + return len(in) > 8 && + bytes.Equal(in[:8], []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}) +} + +// Jpg matches a Joint Photographic Experts Group file. +func Jpg(in []byte) bool { + return len(in) > 3 && bytes.Equal(in[:3], []byte{0xFF, 0xD8, 0xFF}) +} + +// Gif matches a Graphics Interchange Format file. +func Gif(in []byte) bool { + return bytes.HasPrefix(in, []byte("GIF87a")) || + bytes.HasPrefix(in, []byte("GIF89a")) +} + +// Webp matches a WebP file. +func Webp(in []byte) bool { + return len(in) > 12 && + bytes.Equal(in[0:4], []byte{0x52, 0x49, 0x46, 0x46}) && + bytes.Equal(in[8:12], []byte{0x57, 0x45, 0x42, 0x50}) +} + +// Bmp matches a bitmap image file. +func Bmp(in []byte) bool { + return len(in) > 1 && in[0] == 0x42 && in[1] == 0x4D +} + +// Ps matches a PostScript file. +func Ps(in []byte) bool { + return bytes.HasPrefix(in, []byte("%!PS-Adobe-")) +} + +// Psd matches a Photoshop Document file. +func Psd(in []byte) bool { + return bytes.HasPrefix(in, []byte("8BPS")) +} + +// Ico matches an ICO file. +func Ico(in []byte) bool { + return len(in) > 3 && + in[0] == 0x00 && in[1] == 0x00 && + in[2] == 0x01 && in[3] == 0x00 +} + +// Tiff matches a Tagged Image File Format file. +func Tiff(in []byte) bool { + return len(in) > 4 && + (bytes.Equal(in[:4], []byte{0x49, 0x49, 0x2A, 0x00}) || + bytes.Equal(in[:4], []byte{0x4D, 0x4D, 0x00, 0x2A})) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/matchers.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/matchers.go new file mode 100644 index 000000000..6170443a8 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/matchers.go @@ -0,0 +1,46 @@ +// Package matchers holds the matching functions used to find mime types. +package matchers + +// ReadLimit is the maximum number of bytes read +// from the input when detecting a reader. +const ReadLimit = 2048 + +// True is a dummy matching function used to match any input. +func True([]byte) bool { + return true +} + +// False is a dummy matching function used to never match input. +func False([]byte) bool { + return false +} + +// trimLWS trims whitespace from beginning of the input. +func trimLWS(in []byte) []byte { + firstNonWS := 0 + for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ { + } + + return in[firstNonWS:] +} + +// trimRWS trims whitespace from the end of the input. +func trimRWS(in []byte) []byte { + lastNonWS := len(in) - 1 + for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- { + } + + return in[:lastNonWS+1] +} + +func firstLine(in []byte) []byte { + lineEnd := 0 + for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ { + } + + return in[:lineEnd] +} + +func isWS(b byte) bool { + return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' ' +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/ms_office.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/ms_office.go new file mode 100644 index 000000000..7c0ce1d85 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/ms_office.go @@ -0,0 +1,80 @@ +package matchers + +import ( + "bytes" + "fmt" + "strings" +) + +// Xlsx matches a Microsoft Excel 2007 file. +func Xlsx(in []byte) bool { + return bytes.Contains(in, []byte("xl/")) +} + +// Docx matches a Microsoft Office 2007 file. +func Docx(in []byte) bool { + return bytes.Contains(in, []byte("word/")) +} + +// Pptx matches a Microsoft PowerPoint 2007 file. +func Pptx(in []byte) bool { + return bytes.Contains(in, []byte("ppt/")) +} + +// Doc matches a Microsoft Office 97-2003 file. +func Doc(in []byte) bool { + if len(in) < 516 { + return false + } + + head := fmt.Sprintf("%X", in[:8]) + offset512 := fmt.Sprintf("%X", in[512:516]) + + return head == "D0CF11E0A1B11AE1" && offset512 == "ECA5C100" +} + +// Ppt matches a Microsoft PowerPoint 97-2003 file. +func Ppt(in []byte) bool { + if len(in) < 520 { + return false + } + + if fmt.Sprintf("%X", in[:8]) == "D0CF11E0A1B11AE1" { + offset512 := fmt.Sprintf("%X", in[512:516]) + if offset512 == "A0461DF0" || offset512 == "006E1EF0" || offset512 == "0F00E803" { + return true + } + if offset512 == "FDFFFFFF" && fmt.Sprintf("%x", in[518:520]) == "0000" { + return true + } + } + + return false +} + +// Xls matches a Microsoft Excel 97-2003 file. +func Xls(in []byte) bool { + if len(in) < 520 { + return false + } + + if fmt.Sprintf("%X", in[:8]) == "D0CF11E0A1B11AE1" { + offset512 := fmt.Sprintf("%X", in[512:520]) + subheaders := []string{ + "0908100000060500", + "FDFFFFFF10", + "FDFFFFFF1F", + "FDFFFFFF22", + "FDFFFFFF23", + "FDFFFFFF28", + "FDFFFFFF29", + } + for _, h := range subheaders { + if strings.HasPrefix(offset512, h) { + return true + } + } + } + + return false +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/odf.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/odf.go new file mode 100644 index 000000000..ad31e666b --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/odf.go @@ -0,0 +1,48 @@ +package matchers + +import "bytes" + +// Odt matches an OpenDocument Text file. +func Odt(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.text")) +} + +// Ott matches an OpenDocument Text Template file. +func Ott(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.text-template")) +} + +// Ods matches an OpenDocument Spreadsheet file. +func Ods(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet")) +} + +// Ots matches an OpenDocument Spreadsheet Template file. +func Ots(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template")) +} + +// Odp matches an OpenDocument Presentation file. +func Odp(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation")) +} + +// Otp matches an OpenDocument Presentation Template file. +func Otp(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template")) +} + +// Odg matches an OpenDocument Drawing file. +func Odg(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics")) +} + +// Otg matches an OpenDocument Drawing Template file. +func Otg(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template")) +} + +// Odf matches an OpenDocument Formula file. +func Odf(in []byte) bool { + return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.formula")) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/signature.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/signature.go new file mode 100644 index 000000000..a3364de40 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/signature.go @@ -0,0 +1,125 @@ +package matchers + +import ( + "bytes" + "fmt" +) + +type ( + markupSig []byte + ciSig []byte // case insensitive signature + shebangSig []byte // matches !# followed by the signature + ftypSig []byte // matches audio/video files. www.ftyps.com + xmlSig struct { + // the local name of the root tag + localName []byte + // the namespace of the XML document + xmlns []byte + } + sig interface { + detect([]byte) bool + } +) + +func newXmlSig(localName, xmlns string) xmlSig { + ret := xmlSig{xmlns: []byte(xmlns)} + if localName != "" { + ret.localName = []byte(fmt.Sprintf("<%s", localName)) + } + + return ret +} + +// Implement sig interface. +func (hSig markupSig) detect(in []byte) bool { + if len(in) < len(hSig)+1 { + return false + } + + // perform case insensitive check + for i, b := range hSig { + db := in[i] + if 'A' <= b && b <= 'Z' { + db &= 0xDF + } + if b != db { + return false + } + } + // Next byte must be space or right angle bracket. + if db := in[len(hSig)]; db != ' ' && db != '>' { + return false + } + + return true +} + +// Implement sig interface. +func (tSig ciSig) detect(in []byte) bool { + if len(in) < len(tSig)+1 { + return false + } + + // perform case insensitive check + for i, b := range tSig { + db := in[i] + if 'A' <= b && b <= 'Z' { + db &= 0xDF + } + if b != db { + return false + } + } + + return true +} + +// a valid shebang starts with the "#!" characters +// followed by any number of spaces +// followed by the path to the interpreter and optionally, the args for the interpreter +func (sSig shebangSig) detect(in []byte) bool { + in = firstLine(in) + + if len(in) < len(sSig)+2 { + return false + } + if in[0] != '#' || in[1] != '!' { + return false + } + + in = trimLWS(trimRWS(in[2:])) + + return bytes.Equal(in, sSig) +} + +// Implement sig interface. +func (fSig ftypSig) detect(in []byte) bool { + return len(in) > 12 && + bytes.Equal(in[4:8], []byte("ftyp")) && + bytes.Equal(in[8:12], fSig) +} + +func (xSig xmlSig) detect(in []byte) bool { + l := 512 + if len(in) < l { + l = len(in) + } + in = in[:l] + + if len(xSig.localName) == 0 { + return bytes.Index(in, xSig.xmlns) > 0 + } + + localNameIndex := bytes.Index(in, xSig.localName) + return localNameIndex != -1 && localNameIndex < bytes.Index(in, xSig.xmlns) +} + +func detect(in []byte, sigs []sig) bool { + for _, sig := range sigs { + if sig.detect(in) { + return true + } + } + + return false +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/text.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/text.go new file mode 100644 index 000000000..9af1b9132 --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/text.go @@ -0,0 +1,255 @@ +package matchers + +import ( + "bytes" + + "github.com/gabriel-vasile/mimetype/internal/json" +) + +var ( + htmlSigs = []sig{ + markupSig(" 6 && bytes.Equal(in[:6], []byte("{\\rtf1")) +} + +// Svg matches a SVG file. +func Svg(in []byte) bool { + return bytes.Contains(in, []byte(" 1 && len(lines) > 1 +} + +// butLastLineReader returns a reader to the provided byte slice. +// the reader is guaranteed to reach EOF before it reads `cutAt` bytes. +// bytes after the last newline are dropped from the input. +func butLastLineReader(in []byte, cutAt int) io.Reader { + if len(in) >= cutAt { + for i := cutAt - 1; i > 0; i-- { + if in[i] == '\n' { + return bytes.NewReader(in[:i]) + } + } + + // no newline was found between the 0 index and cutAt + return bytes.NewReader(in[:cutAt]) + } + + return bytes.NewReader(in) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/video.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/video.go new file mode 100644 index 000000000..4eb08310f --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/video.go @@ -0,0 +1,70 @@ +package matchers + +import ( + "bytes" +) + +// WebM matches a WebM file. +func WebM(in []byte) bool { + return isMatroskaFileTypeMatched(in, "webm") +} + +// Mkv matches a mkv file. +func Mkv(in []byte) bool { + return isMatroskaFileTypeMatched(in, "matroska") +} + +// isMatroskaFileTypeMatched is used for webm and mkv file matching. +// It checks for .Eߣ sequence. If the sequence is found, +// then it means it is Matroska media container, including WebM. +// Then it verifies which of the file type it is representing by matching the +// file specific string. +func isMatroskaFileTypeMatched(in []byte, flType string) bool { + if bytes.HasPrefix(in, []byte("\x1A\x45\xDF\xA3")) { + return isFileTypeNamePresent(in, flType) + } + return false +} + +// isFileTypeNamePresent accepts the matroska input data stream and searches +// for the given file type in the stream. Return whether a match is found. +// The logic of search is: find first instance of \x42\x82 and then +// search for given string after one byte of above instance. +func isFileTypeNamePresent(in []byte, flType string) bool { + var ind int + if len(in) >= 4096 { // restricting length to 4096 + ind = bytes.Index(in[0:4096], []byte("\x42\x82")) + } else { + ind = bytes.Index(in, []byte("\x42\x82")) + } + if ind > 0 { + // filetype name will be present exactly + // one byte after the match of the two bytes "\x42\x82" + return bytes.HasPrefix(in[ind+3:], []byte(flType)) + } + return false +} + +// Flv matches a Flash video file. +func Flv(in []byte) bool { + return bytes.HasPrefix(in, []byte("\x46\x4C\x56\x01")) +} + +// Mpeg matches a Moving Picture Experts Group file. +func Mpeg(in []byte) bool { + return len(in) > 3 && bytes.Equal(in[:3], []byte("\x00\x00\x01")) && + in[3] >= 0xB0 && in[3] <= 0xBF +} + +// Avi matches an Audio Video Interleaved file. +func Avi(in []byte) bool { + return len(in) > 16 && + bytes.Equal(in[:4], []byte("RIFF")) && + bytes.Equal(in[8:16], []byte("AVI LIST")) +} + +// Asf matches an Advanced Systems Format file. +func Asf(in []byte) bool { + return len(in) > 16 && bytes.Equal(in[:16], []byte{0x30, 0x26, 0xB2, 0x75, + 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C}) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/video_ftyp.go b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/video_ftyp.go new file mode 100644 index 000000000..0d470cd3c --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/matchers/video_ftyp.go @@ -0,0 +1,70 @@ +package matchers + +var ( + mp4Sigs = []sig{ + ftypSig("avc1"), ftypSig("dash"), ftypSig("iso2"), ftypSig("iso3"), + ftypSig("iso4"), ftypSig("iso5"), ftypSig("iso6"), ftypSig("isom"), + ftypSig("mmp4"), ftypSig("mp41"), ftypSig("mp42"), ftypSig("mp4v"), + ftypSig("mp71"), ftypSig("MSNV"), ftypSig("NDAS"), ftypSig("NDSC"), + ftypSig("NSDC"), ftypSig("NSDH"), ftypSig("NDSM"), ftypSig("NDSP"), + ftypSig("NDSS"), ftypSig("NDXC"), ftypSig("NDXH"), ftypSig("NDXM"), + ftypSig("NDXP"), ftypSig("NDXS"), ftypSig("F4V "), ftypSig("F4P "), + } + threeGPSigs = []sig{ + ftypSig("3gp1"), ftypSig("3gp2"), ftypSig("3gp3"), ftypSig("3gp4"), + ftypSig("3gp5"), ftypSig("3gp6"), ftypSig("3gs7"), ftypSig("3ge6"), + ftypSig("3ge7"), ftypSig("3gg6"), + } + threeG2Sigs = []sig{ + ftypSig("3g2a"), ftypSig("3g2b"), ftypSig("3g2c"), ftypSig("KDDI"), + } + amp4Sigs = []sig{ + // audio for Adobe Flash Player 9+ + ftypSig("F4A "), ftypSig("F4B "), + // Apple iTunes AAC-LC (.M4A) Audio + ftypSig("M4B "), ftypSig("M4P "), + // MPEG-4 (.MP4) for SonyPSP + ftypSig("MSNV"), + // Nero Digital AAC Audio + ftypSig("NDAS"), + } + qtSigs = []sig{ftypSig("qt "), ftypSig("moov")} + mqvSigs = []sig{ftypSig("mqt ")} + m4aSigs = []sig{ftypSig("M4A ")} + // TODO: add support for remaining video formats at ftyps.com. +) + +// Mp4 matches an MP4 file. +func Mp4(in []byte) bool { + return detect(in, mp4Sigs) +} + +// ThreeGP matches a 3GPP file. +func ThreeGP(in []byte) bool { + return detect(in, threeGPSigs) +} + +// ThreeG2 matches a 3GPP2 file. +func ThreeG2(in []byte) bool { + return detect(in, threeG2Sigs) +} + +// AMp4 matches an audio MP4 file. +func AMp4(in []byte) bool { + return detect(in, amp4Sigs) +} + +// QuickTime matches a QuickTime File Format file. +func QuickTime(in []byte) bool { + return detect(in, qtSigs) +} + +// Mqv matches a Sony / Mobile QuickTime file. +func Mqv(in []byte) bool { + return detect(in, mqvSigs) +} + +// M4a matches an audio M4A file. +func M4a(in []byte) bool { + return detect(in, m4aSigs) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/mime.go b/vendor/github.com/gabriel-vasile/mimetype/mime.go new file mode 100644 index 000000000..5992a2c4e --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/mime.go @@ -0,0 +1,55 @@ +// Package mimetype uses magic number signatures +// to detect the MIME type and extension of a file. +package mimetype + +import ( + "io" + "os" + + "github.com/gabriel-vasile/mimetype/internal/matchers" +) + +// Detect returns the MIME type and extension of the provided byte slice. +// +// mime is always a valid MIME type, with application/octet-stream as fallback. +// extension is empty string if detected file format does not have an extension. +func Detect(in []byte) (mime, extension string) { + if len(in) == 0 { + return "inode/x-empty", "" + } + n := root.match(in, root) + return n.mime, n.extension +} + +// DetectReader returns the MIME type and extension +// of the byte slice read from the provided reader. +// +// mime is always a valid MIME type, with application/octet-stream as fallback. +// extension is empty string if detection failed with an error or +// detected file format does not have an extension. +func DetectReader(r io.Reader) (mime, extension string, err error) { + in := make([]byte, matchers.ReadLimit) + n, err := r.Read(in) + if err != nil && err != io.EOF { + return root.mime, root.extension, err + } + in = in[:n] + + mime, extension = Detect(in) + return mime, extension, nil +} + +// DetectFile returns the MIME type and extension of the provided file. +// +// mime is always a valid MIME type, with application/octet-stream as fallback. +// extension is empty string if detection failed with an error or +// detected file format does not have an extension. +func DetectFile(file string) (mime, extension string, err error) { + f, err := os.Open(file) + if err != nil { + return root.mime, root.extension, err + } + defer f.Close() + + return DetectReader(f) +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/mime_test.go b/vendor/github.com/gabriel-vasile/mimetype/mime_test.go new file mode 100644 index 000000000..76a169cdf --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/mime_test.go @@ -0,0 +1,232 @@ +package mimetype + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/gabriel-vasile/mimetype/internal/matchers" +) + +const testDataDir = "testdata" + +var files = map[string]*node{ + // archives + "pdf.pdf": pdf, + "zip.zip": zip, + "tar.tar": tar, + "xls.xls": xls, + "xlsx.xlsx": xlsx, + "doc.doc": doc, + "docx.docx": docx, + "docx.1.docx": docx, + "ppt.ppt": ppt, + "pptx.pptx": pptx, + "odt.odt": odt, + "ott.ott": ott, + "ods.ods": ods, + "ots.ots": ots, + "odp.odp": odp, + "otp.otp": otp, + "odg.odg": odg, + "otg.otg": otg, + "odf.odf": odf, + "epub.epub": epub, + "7z.7z": sevenZ, + "jar.jar": jar, + "gz.gz": gzip, + "fits.fits": fits, + "xar.xar": xar, + "bz2.bz2": bz2, + "a.a": ar, + "deb.deb": deb, + + // images + "png.png": png, + "jpg.jpg": jpg, + "psd.psd": psd, + "webp.webp": webp, + "tif.tif": tiff, + "ico.ico": ico, + "bmp.bmp": bmp, + + // video + "mp4.mp4": mp4, + "mp4.1.mp4": mp4, + "webm.webm": webM, + "3gp.3gp": threeGP, + "3g2.3g2": threeG2, + "flv.flv": flv, + "avi.avi": avi, + "mov.mov": quickTime, + "mqv.mqv": mqv, + "mpeg.mpeg": mpeg, + "mkv.mkv": mkv, + "asf.asf": asf, + + // audio + "mp3.mp3": mp3, + "wav.wav": wav, + "flac.flac": flac, + "midi.midi": midi, + "ape.ape": ape, + "aiff.aiff": aiff, + "au.au": au, + "ogg.ogg": ogg, + "amr.amr": amr, + "mpc.mpc": musePack, + "m4a.m4a": m4a, + "m4b.m4b": aMp4, + + // source code + "html.html": html, + "svg.svg": svg, + "svg.1.svg": svg, + "txt.txt": txt, + "php.php": php, + "ps.ps": ps, + "json.json": json, + "geojson.geojson": geoJson, + "geojson.1.geojson": geoJson, + "csv.csv": csv, + "tsv.tsv": tsv, + "rtf.rtf": rtf, + "js.js": js, + "lua.lua": lua, + "pl.pl": perl, + "py.py": python, + "tcl.tcl": tcl, + "vCard.vCard": vCard, + + // binary + "class.class": class, + "swf.swf": swf, + "crx.crx": crx, + "wasm.wasm": wasm, + "exe.exe": exe, + "ln": elfExe, + "so.so": elfLib, + "o.o": elfObj, + "dcm.dcm": dcm, + + // fonts + "woff.woff": woff, + "woff2.woff2": woff2, + + // XML and subtypes of XML + "xml.xml": xml, + "kml.kml": kml, + "dae.dae": collada, + "gml.gml": gml, + "gpx.gpx": gpx, + "tcx.tcx": tcx, + "x3d.x3d": x3d, + + "shp.shp": shp, + "shx.shx": shx, + "dbf.dbf": dbf, +} + +func TestMatching(t *testing.T) { + errStr := "File: %s; Mime: %s != DetectedMime: %s; err: %v" + for fName, node := range files { + fileName := filepath.Join(testDataDir, fName) + f, err := os.Open(fileName) + if err != nil { + t.Fatal(err) + } + data, err := ioutil.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + if dMime, _ := Detect(data); dMime != node.mime { + t.Errorf(errStr, fName, node.mime, dMime, nil) + } + + if _, err := f.Seek(0, io.SeekStart); err != nil { + t.Errorf(errStr, fName, node.mime, root.mime, err) + } + + if dMime, _, err := DetectReader(f); dMime != node.mime { + t.Errorf(errStr, fName, node.mime, dMime, err) + } + f.Close() + + if dMime, _, err := DetectFile(fileName); dMime != node.mime { + t.Errorf(errStr, fName, node.mime, dMime, err) + } + } +} + +func TestFaultyInput(t *testing.T) { + inexistent := "inexistent.file" + if _, _, err := DetectFile(inexistent); err == nil { + t.Errorf("%s should not match successfully", inexistent) + } + + f, _ := os.Open(inexistent) + if _, _, err := DetectReader(f); err == nil { + t.Errorf("%s reader should not match successfully", inexistent) + } +} + +func TestEmptyInput(t *testing.T) { + if m, _ := Detect([]byte{}); m != "inode/x-empty" { + t.Errorf("failed to detect empty file") + } +} + +func TestGenerateSupportedMimesFile(t *testing.T) { + f, err := os.OpenFile("supported_mimes.md", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + nodes := root.flatten() + header := fmt.Sprintf(`## %d Supported MIME types +This file is automatically generated when running tests. Do not edit manually. + +Extension | MIME type +--------- | -------- +`, len(nodes)) + + if _, err := f.WriteString(header); err != nil { + t.Fatal(err) + } + for _, n := range nodes { + ext := n.extension + if ext == "" { + ext = "n/a" + } + str := fmt.Sprintf("**%s** | %s\n", ext, n.mime) + if _, err := f.WriteString(str); err != nil { + t.Fatal(err) + } + } +} + +func BenchmarkMatchDetect(b *testing.B) { + files := []string{"a.png", "a.jpg", "a.pdf", "a.zip", "a.docx", "a.doc"} + data, fLen := [][matchers.ReadLimit]byte{}, len(files) + for _, f := range files { + d := [matchers.ReadLimit]byte{} + + file, err := os.Open(filepath.Join(testDataDir, f)) + if err != nil { + b.Fatal(err) + } + + io.ReadFull(file, d[:]) + data = append(data, d) + } + + b.ResetTimer() + for n := 0; n < b.N; n++ { + Detect(data[n%fLen][:]) + } +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/node.go b/vendor/github.com/gabriel-vasile/mimetype/node.go new file mode 100644 index 000000000..721c76b8c --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/node.go @@ -0,0 +1,43 @@ +package mimetype + +type ( + // node represents a vertex in the matchers tree structure. + // It holds the mime type, the extension and the function + // to check whether a byte slice has the mime type. + node struct { + mime string + extension string + matchFunc func([]byte) bool + children []*node + } +) + +func newNode(mime, extension string, matchFunc func([]byte) bool, children ...*node) *node { + return &node{ + mime: mime, + extension: extension, + matchFunc: matchFunc, + children: children, + } +} + +// match does a depth-first search on the matchers tree. +// it returns the deepest successful matcher for which all the children fail. +func (n *node) match(in []byte, deepestMatch *node) *node { + for _, c := range n.children { + if c.matchFunc(in) { + return c.match(in, c) + } + } + + return deepestMatch +} + +func (n *node) flatten() []*node { + out := []*node{n} + for _, c := range n.children { + out = append(out, c.flatten()...) + } + + return out +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md b/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md new file mode 100644 index 000000000..62a8f99bd --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/supported_mimes.md @@ -0,0 +1,103 @@ +## 98 Supported MIME types +This file is automatically generated when running tests. Do not edit manually. + +Extension | MIME type +--------- | -------- +**n/a** | application/octet-stream +**7z** | application/x-7z-compressed +**zip** | application/zip +**xlsx** | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet +**docx** | application/vnd.openxmlformats-officedocument.wordprocessingml.document +**pptx** | application/vnd.openxmlformats-officedocument.presentationml.presentation +**epub** | application/epub+zip +**jar** | application/jar +**odt** | application/vnd.oasis.opendocument.text +**ott** | application/vnd.oasis.opendocument.text-template +**ods** | application/vnd.oasis.opendocument.spreadsheet +**ots** | application/vnd.oasis.opendocument.spreadsheet-template +**odp** | application/vnd.oasis.opendocument.presentation +**otp** | application/vnd.oasis.opendocument.presentation-template +**odg** | application/vnd.oasis.opendocument.graphics +**otg** | application/vnd.oasis.opendocument.graphics-template +**odf** | application/vnd.oasis.opendocument.formula +**pdf** | application/pdf +**doc** | application/msword +**xls** | application/vnd.ms-excel +**ppt** | application/vnd.ms-powerpoint +**ps** | application/postscript +**psd** | application/x-photoshop +**ogg** | application/ogg +**png** | image/png +**jpg** | image/jpeg +**gif** | image/gif +**webp** | image/webp +**exe** | application/vnd.microsoft.portable-executable +**n/a** | application/x-elf +**n/a** | application/x-object +**n/a** | application/x-executable +**so** | application/x-sharedlib +**n/a** | application/x-coredump +**a** | application/x-archive +**deb** | application/vnd.debian.binary-package +**tar** | application/x-tar +**xar** | application/x-xar +**bz2** | application/x-bzip2 +**fits** | application/fits +**tiff** | image/tiff +**bmp** | image/bmp +**ico** | image/x-icon +**mp3** | audio/mpeg +**flac** | audio/flac +**midi** | audio/midi +**ape** | audio/ape +**mpc** | audio/musepack +**amr** | audio/amr +**wav** | audio/wav +**aiff** | audio/aiff +**au** | audio/basic +**mpeg** | video/mpeg +**mov** | video/quicktime +**mqv** | video/quicktime +**mp4** | video/mp4 +**webm** | video/webm +**3gp** | video/3gpp +**3g2** | video/3gpp2 +**avi** | video/x-msvideo +**flv** | video/x-flv +**mkv** | video/x-matroska +**asf** | video/x-ms-asf +**mp4** | audio/mp4 +**m4a** | audio/x-m4a +**txt** | text/plain +**html** | text/html; charset=utf-8 +**svg** | image/svg+xml +**xml** | text/xml; charset=utf-8 +**x3d** | model/x3d+xml +**kml** | application/vnd.google-earth.kml+xml +**dae** | model/vnd.collada+xml +**gml** | application/gml+xml +**gpx** | application/gpx+xml +**tcx** | application/vnd.garmin.tcx+xml +**php** | text/x-php; charset=utf-8 +**js** | application/javascript +**lua** | text/x-lua +**pl** | text/x-perl +**py** | application/x-python +**json** | application/json +**geojson** | application/geo+json +**rtf** | text/rtf +**tcl** | text/x-tcl +**csv** | text/csv +**tsv** | text/tab-separated-values +**vcf** | text/vcard +**gz** | application/gzip +**class** | application/x-java-applet; charset=binary +**swf** | application/x-shockwave-flash +**crx** | application/x-chrome-extension +**woff** | font/woff +**woff2** | font/woff2 +**wasm** | application/wasm +**shx** | application/octet-stream +**shp** | application/octet-stream +**dbf** | application/x-dbf +**dcm** | application/dicom diff --git a/vendor/github.com/gabriel-vasile/mimetype/tree.go b/vendor/github.com/gabriel-vasile/mimetype/tree.go new file mode 100644 index 000000000..e43765e0d --- /dev/null +++ b/vendor/github.com/gabriel-vasile/mimetype/tree.go @@ -0,0 +1,118 @@ +package mimetype + +import "github.com/gabriel-vasile/mimetype/internal/matchers" + +// root is a matcher which passes for any slice of bytes. +// When a matcher passes the check, the children matchers +// are tried in order to find a more accurate mime type. +var root = newNode("application/octet-stream", "", matchers.True, + sevenZ, zip, pdf, doc, xls, ppt, ps, psd, ogg, png, jpg, gif, webp, exe, elf, + ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape, musePack, amr, + wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP, threeG2, avi, flv, + mkv, asf, aMp4, m4a, txt, gzip, class, swf, crx, woff, woff2, wasm, shx, dbf, + dcm, +) + +// The list of nodes appended to the root node +var ( + gzip = newNode("application/gzip", "gz", matchers.Gzip) + sevenZ = newNode("application/x-7z-compressed", "7z", matchers.SevenZ) + zip = newNode("application/zip", "zip", matchers.Zip, + xlsx, docx, pptx, epub, jar, odt, ods, odp, odg, odf) + tar = newNode("application/x-tar", "tar", matchers.Tar) + xar = newNode("application/x-xar", "xar", matchers.Xar) + bz2 = newNode("application/x-bzip2", "bz2", matchers.Bz2) + pdf = newNode("application/pdf", "pdf", matchers.Pdf) + xlsx = newNode("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx", matchers.Xlsx) + docx = newNode("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx", matchers.Docx) + pptx = newNode("application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx", matchers.Pptx) + epub = newNode("application/epub+zip", "epub", matchers.Epub) + jar = newNode("application/jar", "jar", matchers.Jar) + doc = newNode("application/msword", "doc", matchers.Doc) + ppt = newNode("application/vnd.ms-powerpoint", "ppt", matchers.Ppt) + xls = newNode("application/vnd.ms-excel", "xls", matchers.Xls) + ps = newNode("application/postscript", "ps", matchers.Ps) + psd = newNode("application/x-photoshop", "psd", matchers.Psd) + fits = newNode("application/fits", "fits", matchers.Fits) + ogg = newNode("application/ogg", "ogg", matchers.Ogg) + txt = newNode("text/plain", "txt", matchers.Txt, + html, svg, xml, php, js, lua, perl, python, json, rtf, tcl, csv, tsv, vCard) + xml = newNode("text/xml; charset=utf-8", "xml", matchers.Xml, + x3d, kml, collada, gml, gpx, tcx) + json = newNode("application/json", "json", matchers.Json, geoJson) + csv = newNode("text/csv", "csv", matchers.Csv) + tsv = newNode("text/tab-separated-values", "tsv", matchers.Tsv) + geoJson = newNode("application/geo+json", "geojson", matchers.GeoJson) + html = newNode("text/html; charset=utf-8", "html", matchers.Html) + php = newNode("text/x-php; charset=utf-8", "php", matchers.Php) + rtf = newNode("text/rtf", "rtf", matchers.Rtf) + js = newNode("application/javascript", "js", matchers.Js) + lua = newNode("text/x-lua", "lua", matchers.Lua) + perl = newNode("text/x-perl", "pl", matchers.Perl) + python = newNode("application/x-python", "py", matchers.Python) + tcl = newNode("text/x-tcl", "tcl", matchers.Tcl) + vCard = newNode("text/vcard", "vcf", matchers.VCard) + svg = newNode("image/svg+xml", "svg", matchers.Svg) + x3d = newNode("model/x3d+xml", "x3d", matchers.X3d) + kml = newNode("application/vnd.google-earth.kml+xml", "kml", matchers.Kml) + collada = newNode("model/vnd.collada+xml", "dae", matchers.Collada) + gml = newNode("application/gml+xml", "gml", matchers.Gml) + gpx = newNode("application/gpx+xml", "gpx", matchers.Gpx) + tcx = newNode("application/vnd.garmin.tcx+xml", "tcx", matchers.Tcx) + png = newNode("image/png", "png", matchers.Png) + jpg = newNode("image/jpeg", "jpg", matchers.Jpg) + gif = newNode("image/gif", "gif", matchers.Gif) + webp = newNode("image/webp", "webp", matchers.Webp) + tiff = newNode("image/tiff", "tiff", matchers.Tiff) + bmp = newNode("image/bmp", "bmp", matchers.Bmp) + ico = newNode("image/x-icon", "ico", matchers.Ico) + mp3 = newNode("audio/mpeg", "mp3", matchers.Mp3) + flac = newNode("audio/flac", "flac", matchers.Flac) + midi = newNode("audio/midi", "midi", matchers.Midi) + ape = newNode("audio/ape", "ape", matchers.Ape) + musePack = newNode("audio/musepack", "mpc", matchers.MusePack) + wav = newNode("audio/wav", "wav", matchers.Wav) + aiff = newNode("audio/aiff", "aiff", matchers.Aiff) + au = newNode("audio/basic", "au", matchers.Au) + amr = newNode("audio/amr", "amr", matchers.Amr) + aMp4 = newNode("audio/mp4", "mp4", matchers.AMp4) + m4a = newNode("audio/x-m4a", "m4a", matchers.M4a) + mp4 = newNode("video/mp4", "mp4", matchers.Mp4) + webM = newNode("video/webm", "webm", matchers.WebM) + mpeg = newNode("video/mpeg", "mpeg", matchers.Mpeg) + quickTime = newNode("video/quicktime", "mov", matchers.QuickTime) + mqv = newNode("video/quicktime", "mqv", matchers.Mqv) + threeGP = newNode("video/3gpp", "3gp", matchers.ThreeGP) + threeG2 = newNode("video/3gpp2", "3g2", matchers.ThreeG2) + avi = newNode("video/x-msvideo", "avi", matchers.Avi) + flv = newNode("video/x-flv", "flv", matchers.Flv) + mkv = newNode("video/x-matroska", "mkv", matchers.Mkv) + asf = newNode("video/x-ms-asf", "asf", matchers.Asf) + class = newNode("application/x-java-applet; charset=binary", "class", matchers.Class) + swf = newNode("application/x-shockwave-flash", "swf", matchers.Swf) + crx = newNode("application/x-chrome-extension", "crx", matchers.Crx) + woff = newNode("font/woff", "woff", matchers.Woff) + woff2 = newNode("font/woff2", "woff2", matchers.Woff2) + wasm = newNode("application/wasm", "wasm", matchers.Wasm) + shp = newNode("application/octet-stream", "shp", matchers.Shp) + shx = newNode("application/octet-stream", "shx", matchers.Shx, shp) + dbf = newNode("application/x-dbf", "dbf", matchers.Dbf) + exe = newNode("application/vnd.microsoft.portable-executable", "exe", matchers.Exe) + elf = newNode("application/x-elf", "", matchers.Elf, elfObj, elfExe, elfLib, elfDump) + elfObj = newNode("application/x-object", "", matchers.ElfObj) + elfExe = newNode("application/x-executable", "", matchers.ElfExe) + elfLib = newNode("application/x-sharedlib", "so", matchers.ElfLib) + elfDump = newNode("application/x-coredump", "", matchers.ElfDump) + ar = newNode("application/x-archive", "a", matchers.Ar, deb) + deb = newNode("application/vnd.debian.binary-package", "deb", matchers.Deb) + dcm = newNode("application/dicom", "dcm", matchers.Dcm) + odt = newNode("application/vnd.oasis.opendocument.text", "odt", matchers.Odt, ott) + ott = newNode("application/vnd.oasis.opendocument.text-template", "ott", matchers.Ott) + ods = newNode("application/vnd.oasis.opendocument.spreadsheet", "ods", matchers.Ods, ots) + ots = newNode("application/vnd.oasis.opendocument.spreadsheet-template", "ots", matchers.Ots) + odp = newNode("application/vnd.oasis.opendocument.presentation", "odp", matchers.Odp, otp) + otp = newNode("application/vnd.oasis.opendocument.presentation-template", "otp", matchers.Otp) + odg = newNode("application/vnd.oasis.opendocument.graphics", "odg", matchers.Odg, otg) + otg = newNode("application/vnd.oasis.opendocument.graphics-template", "otg", matchers.Otg) + odf = newNode("application/vnd.oasis.opendocument.formula", "odf", matchers.Odf) +) diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore new file mode 100644 index 000000000..042091d9b --- /dev/null +++ b/vendor/github.com/golang/snappy/.gitignore @@ -0,0 +1,16 @@ +cmd/snappytool/snappytool +testdata/bench + +# These explicitly listed benchmark data files are for an obsolete version of +# snappy_test.go. +testdata/alice29.txt +testdata/asyoulik.txt +testdata/fireworks.jpeg +testdata/geo.protodata +testdata/html +testdata/html_x_4 +testdata/kppkn.gtb +testdata/lcet10.txt +testdata/paper-100k.pdf +testdata/plrabn12.txt +testdata/urls.10K diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS new file mode 100644 index 000000000..bcfa19520 --- /dev/null +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -0,0 +1,15 @@ +# This is the official list of Snappy-Go authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization +# The email address is not required for organizations. + +# Please keep the list sorted. + +Damian Gryski +Google Inc. +Jan Mercl <0xjnml@gmail.com> +Rodolfo Carvalho +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS new file mode 100644 index 000000000..931ae3160 --- /dev/null +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -0,0 +1,37 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Snappy-Go repository. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# The submission process automatically checks to make sure +# that people submitting code are listed in this file (by email address). +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# http://code.google.com/legal/individual-cla-v1.0.html +# http://code.google.com/legal/corporate-cla-v1.0.html +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. + +# Names should be added to this file like so: +# Name + +# Please keep the list sorted. + +Damian Gryski +Jan Mercl <0xjnml@gmail.com> +Kai Backman +Marc-Antoine Ruel +Nigel Tao +Rob Pike +Rodolfo Carvalho +Russ Cox +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE new file mode 100644 index 000000000..6050c10f4 --- /dev/null +++ b/vendor/github.com/golang/snappy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README new file mode 100644 index 000000000..cea12879a --- /dev/null +++ b/vendor/github.com/golang/snappy/README @@ -0,0 +1,107 @@ +The Snappy compression format in the Go programming language. + +To download and install from source: +$ go get github.com/golang/snappy + +Unless otherwise noted, the Snappy-Go source files are distributed +under the BSD-style license found in the LICENSE file. + + + +Benchmarks. + +The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten +or so files, the same set used by the C++ Snappy code (github.com/google/snappy +and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ +3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: + +"go test -test.bench=." + +_UFlat0-8 2.19GB/s ± 0% html +_UFlat1-8 1.41GB/s ± 0% urls +_UFlat2-8 23.5GB/s ± 2% jpg +_UFlat3-8 1.91GB/s ± 0% jpg_200 +_UFlat4-8 14.0GB/s ± 1% pdf +_UFlat5-8 1.97GB/s ± 0% html4 +_UFlat6-8 814MB/s ± 0% txt1 +_UFlat7-8 785MB/s ± 0% txt2 +_UFlat8-8 857MB/s ± 0% txt3 +_UFlat9-8 719MB/s ± 1% txt4 +_UFlat10-8 2.84GB/s ± 0% pb +_UFlat11-8 1.05GB/s ± 0% gaviota + +_ZFlat0-8 1.04GB/s ± 0% html +_ZFlat1-8 534MB/s ± 0% urls +_ZFlat2-8 15.7GB/s ± 1% jpg +_ZFlat3-8 740MB/s ± 3% jpg_200 +_ZFlat4-8 9.20GB/s ± 1% pdf +_ZFlat5-8 991MB/s ± 0% html4 +_ZFlat6-8 379MB/s ± 0% txt1 +_ZFlat7-8 352MB/s ± 0% txt2 +_ZFlat8-8 396MB/s ± 1% txt3 +_ZFlat9-8 327MB/s ± 1% txt4 +_ZFlat10-8 1.33GB/s ± 1% pb +_ZFlat11-8 605MB/s ± 1% gaviota + + + +"go test -test.bench=. -tags=noasm" + +_UFlat0-8 621MB/s ± 2% html +_UFlat1-8 494MB/s ± 1% urls +_UFlat2-8 23.2GB/s ± 1% jpg +_UFlat3-8 1.12GB/s ± 1% jpg_200 +_UFlat4-8 4.35GB/s ± 1% pdf +_UFlat5-8 609MB/s ± 0% html4 +_UFlat6-8 296MB/s ± 0% txt1 +_UFlat7-8 288MB/s ± 0% txt2 +_UFlat8-8 309MB/s ± 1% txt3 +_UFlat9-8 280MB/s ± 1% txt4 +_UFlat10-8 753MB/s ± 0% pb +_UFlat11-8 400MB/s ± 0% gaviota + +_ZFlat0-8 409MB/s ± 1% html +_ZFlat1-8 250MB/s ± 1% urls +_ZFlat2-8 12.3GB/s ± 1% jpg +_ZFlat3-8 132MB/s ± 0% jpg_200 +_ZFlat4-8 2.92GB/s ± 0% pdf +_ZFlat5-8 405MB/s ± 1% html4 +_ZFlat6-8 179MB/s ± 1% txt1 +_ZFlat7-8 170MB/s ± 1% txt2 +_ZFlat8-8 189MB/s ± 1% txt3 +_ZFlat9-8 164MB/s ± 1% txt4 +_ZFlat10-8 479MB/s ± 1% pb +_ZFlat11-8 270MB/s ± 1% gaviota + + + +For comparison (Go's encoded output is byte-for-byte identical to C++'s), here +are the numbers from C++ Snappy's + +make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log + +BM_UFlat/0 2.4GB/s html +BM_UFlat/1 1.4GB/s urls +BM_UFlat/2 21.8GB/s jpg +BM_UFlat/3 1.5GB/s jpg_200 +BM_UFlat/4 13.3GB/s pdf +BM_UFlat/5 2.1GB/s html4 +BM_UFlat/6 1.0GB/s txt1 +BM_UFlat/7 959.4MB/s txt2 +BM_UFlat/8 1.0GB/s txt3 +BM_UFlat/9 864.5MB/s txt4 +BM_UFlat/10 2.9GB/s pb +BM_UFlat/11 1.2GB/s gaviota + +BM_ZFlat/0 944.3MB/s html (22.31 %) +BM_ZFlat/1 501.6MB/s urls (47.78 %) +BM_ZFlat/2 14.3GB/s jpg (99.95 %) +BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) +BM_ZFlat/4 8.3GB/s pdf (83.30 %) +BM_ZFlat/5 903.5MB/s html4 (22.52 %) +BM_ZFlat/6 336.0MB/s txt1 (57.88 %) +BM_ZFlat/7 312.3MB/s txt2 (61.91 %) +BM_ZFlat/8 353.1MB/s txt3 (54.99 %) +BM_ZFlat/9 289.9MB/s txt4 (66.26 %) +BM_ZFlat/10 1.2GB/s pb (19.68 %) +BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go new file mode 100644 index 000000000..72efb0353 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode.go @@ -0,0 +1,237 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = errors.New("snappy: corrupt input") + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = errors.New("snappy: decoded block is too large") + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrTooLarge + } + return int(v), n, nil +} + +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 +) + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= len(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + } + return nil, ErrCorrupt +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return &Reader{ + r: r, + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + readHeader bool +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.readHeader = false +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + for { + if r.i < r.j { + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil + } + if !r.readFull(r.buf[:4], true) { + return 0, r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return 0, r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + r.err = ErrUnsupported + return 0, r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return 0, r.err + } + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.decoded[:n], false) { + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return 0, r.err + } + for i := 0; i < len(magicBody); i++ { + if r.buf[i] != magicBody[i] { + r.err = ErrCorrupt + return 0, r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return 0, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return 0, r.err + } + } +} diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go new file mode 100644 index 000000000..fcd192b84 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_amd64.go @@ -0,0 +1,14 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +package snappy + +// decode has the same semantics as in decode_other.go. +// +//go:noescape +func decode(dst, src []byte) int diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s new file mode 100644 index 000000000..e6179f65e --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_amd64.s @@ -0,0 +1,490 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - AX scratch +// - BX scratch +// - CX length or x +// - DX offset +// - SI &src[s] +// - DI &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. +// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. +TEXT ·decode(SB), NOSPLIT, $48-56 + // Initialize SI, DI and R8-R13. + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, DI + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, SI + MOVQ R11, R13 + ADDQ R12, R13 + +loop: + // for s < len(src) + CMPQ SI, R13 + JEQ end + + // CX = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBLZX (SI), CX + MOVL CX, BX + ANDL $3, BX + CMPL BX, $1 + JAE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + SHRL $2, CX + CMPL CX, $60 + JAE tagLit60Plus + + // case x < 60: + // s++ + INCQ SI + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that CX == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // CX can hold 64 bits, so the increment cannot overflow. + INCQ CX + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // AX = len(dst) - d + // BX = len(src) - s + MOVQ R10, AX + SUBQ DI, AX + MOVQ R13, BX + SUBQ SI, BX + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMPQ CX, $16 + JGT callMemmove + CMPQ AX, $16 + JLT callMemmove + CMPQ BX, $16 + JLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(SI), X0 + MOVOU X0, 0(DI) + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMPQ CX, AX + JGT errCorrupt + CMPQ CX, BX + JGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, R13 + ADDQ R12, R13 + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADDQ CX, SI + SUBQ $58, SI + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // case x == 60: + CMPL CX, $61 + JEQ tagLit61 + JA tagLit62Plus + + // x = uint32(src[s-1]) + MOVBLZX -1(SI), CX + JMP doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVWLZX -2(SI), CX + JMP doLit + +tagLit62Plus: + CMPL CX, $62 + JA tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVWLZX -3(SI), CX + MOVBLZX -1(SI), BX + SHLL $16, BX + ORL BX, CX + JMP doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVL -4(SI), CX + JMP doLit + +// The code above handles literal tags. +// ---------------------------------------- +// The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADDQ $5, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-5])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVLQZX -4(SI), DX + JMP doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADDQ $3, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-3])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVWQZX -2(SI), DX + JMP doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - BX == src[s] & 0x03 + // - CX == src[s] + CMPQ BX, $2 + JEQ tagCopy2 + JA tagCopy4 + + // case tagCopy1: + // s += 2 + ADDQ $2, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVQ CX, DX + ANDQ $0xe0, DX + SHLQ $3, DX + MOVBQZX -1(SI), BX + ORQ BX, DX + + // length = 4 + int(src[s-2])>>2&0x7 + SHRQ $2, CX + ANDQ $7, CX + ADDQ $4, CX + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - CX == length && CX > 0 + // - DX == offset + + // if offset <= 0 { etc } + CMPQ DX, $0 + JLE errCorrupt + + // if d < offset { etc } + MOVQ DI, BX + SUBQ R8, BX + CMPQ BX, DX + JLT errCorrupt + + // if length > len(dst)-d { etc } + MOVQ R10, BX + SUBQ DI, BX + CMPQ CX, BX + JGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVQ R10, R14 + SUBQ DI, R14 + MOVQ DI, R15 + SUBQ DX, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMPQ CX, $16 + JGT slowForwardCopy + CMPQ DX, $8 + JLT slowForwardCopy + CMPQ R14, $16 + JLT slowForwardCopy + MOVQ 0(R15), AX + MOVQ AX, 0(DI) + MOVQ 8(R15), BX + MOVQ BX, 8(DI) + ADDQ CX, DI + JMP loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUBQ $10, R14 + CMPQ CX, R14 + JGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMPQ DX, $8 + JGE fixUpSlowForwardCopy + MOVQ (R15), BX + MOVQ BX, (DI) + SUBQ DX, CX + ADDQ DX, DI + ADDQ DX, DX + JMP makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by DI being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save DI to AX so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVQ DI, AX + ADDQ CX, DI + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + CMPQ CX, $0 + JLE loop + MOVQ (R15), BX + MOVQ BX, (AX) + ADDQ $8, R15 + ADDQ $8, AX + SUBQ $8, CX + JMP finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), BX + MOVB BX, (DI) + INCQ R15 + INCQ DI + DECQ CX + JNZ verySlowForwardCopy + JMP loop + +// The code above handles copy tags. +// ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMPQ DI, R10 + JNE errCorrupt + + // return 0 + MOVQ $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVQ $1, ret+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go new file mode 100644 index 000000000..8c9f2049b --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -0,0 +1,101 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine !gc noasm + +package snappy + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. Unlike + // the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + for end := d + length; d != end; d++ { + dst[d] = dst[d-offset] + } + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go new file mode 100644 index 000000000..8d393e904 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode.go @@ -0,0 +1,285 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return dst[:d] +} + +// inputMargin is the minimum number of extra input bytes to keep, inside +// encodeBlock's inner loop. On some architectures, this margin lets us +// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) +// literals can be implemented as a single load to and store from a 16-byte +// register. That literal's actual length can be as short as 1 byte, so this +// can copy up to 15 bytes too much, but that's OK as subsequent iterations of +// the encoding loop will fix up the copy overrun, and this inputMargin ensures +// that we don't overrun the dst and src buffers. +const inputMargin = 16 - 1 + +// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that +// could be encoded with a copy tag. This is the minimum with respect to the +// algorithm used by encodeBlock, not a minimum enforced by the file format. +// +// The encoded output must start with at least a 1 byte literal, as there are +// no previous bytes to copy. A minimal (1 byte) copy after that, generated +// from an emitCopy call in encodeBlock's main loop, would require at least +// another inputMargin bytes, for the reason above: we want any emitLiteral +// calls inside encodeBlock's main loop to use the fast path if possible, which +// requires being able to overrun by inputMargin bytes. Thus, +// minNonLiteralBlockSize equals 1 + 1 + inputMargin. +// +// The C++ code doesn't use this exact threshold, but it could, as discussed at +// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion +// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an +// optimization. It should not affect the encoded form. This is tested by +// TestSameEncodingAsCppShortCopies. +const minNonLiteralBlockSize = 1 + 1 + inputMargin + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + n := uint64(srcLen) + if n > 0xffffffff { + return -1 + } + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + n = 32 + n + n/6 + if n > 0xffffffff { + return -1 + } + return int(n) +} + +var errClosed = errors.New("snappy: Writer is closed") + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + obuf: make([]byte, obufLen), + } +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + ibuf: make([]byte, 0, maxBlockSize), + obuf: make([]byte, obufLen), + } +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +type Writer struct { + w io.Writer + err error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + // + // Its use is optional. For backwards compatibility, Writers created by the + // NewWriter function have ibuf == nil, do not buffer incoming bytes, and + // therefore do not need to be Flush'ed or Close'd. + ibuf []byte + + // obuf is a buffer for the outgoing (compressed) bytes. + obuf []byte + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool +} + +// Reset discards the writer's state and switches the Snappy writer to write to +// w. This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + w.w = writer + w.err = nil + if w.ibuf != nil { + w.ibuf = w.ibuf[:0] + } + w.wroteStreamHeader = false +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if w.ibuf == nil { + // Do not buffer incoming bytes. This does not perform or compress well + // if the caller of Writer.Write writes many small slices. This + // behavior is therefore deprecated, but still supported for backwards + // compatibility with code that doesn't explicitly Flush or Close. + return w.write(p) + } + + // The remainder of this method is based on bufio.Writer.Write from the + // standard library. + + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.Flush() + } + nRet += n + p = p[n:] + } + if w.err != nil { + return nRet, w.err + } + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if w.err != nil { + return 0, w.err + } + for len(p) > 0 { + obufStart := len(magicChunk) + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + copy(w.obuf, magicChunk) + obufStart = 0 + } + + var uncompressed []byte + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] + } else { + uncompressed, p = p, nil + } + checksum := crc(uncompressed) + + // Compress the buffer, discarding the result if the improvement + // isn't at least 12.5%. + compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) + chunkType := uint8(chunkTypeCompressedData) + chunkLen := 4 + len(compressed) + obufEnd := obufHeaderLen + len(compressed) + if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { + chunkType = chunkTypeUncompressedData + chunkLen = 4 + len(uncompressed) + obufEnd = obufHeaderLen + } + + // Fill in the per-chunk header that comes before the body. + w.obuf[len(magicChunk)+0] = chunkType + w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) + w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) + w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) + w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) + w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) + w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) + w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) + + if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { + w.err = err + return nRet, err + } + if chunkType == chunkTypeUncompressedData { + if _, err := w.w.Write(uncompressed); err != nil { + w.err = err + return nRet, err + } + } + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +func (w *Writer) Flush() error { + if w.err != nil { + return w.err + } + if len(w.ibuf) == 0 { + return nil + } + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err +} + +// Close calls Flush and then closes the Writer. +func (w *Writer) Close() error { + w.Flush() + ret := w.err + if w.err == nil { + w.err = errClosed + } + return ret +} diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_amd64.go new file mode 100644 index 000000000..150d91bc8 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_amd64.go @@ -0,0 +1,29 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +package snappy + +// emitLiteral has the same semantics as in encode_other.go. +// +//go:noescape +func emitLiteral(dst, lit []byte) int + +// emitCopy has the same semantics as in encode_other.go. +// +//go:noescape +func emitCopy(dst []byte, offset, length int) int + +// extendMatch has the same semantics as in encode_other.go. +// +//go:noescape +func extendMatch(src []byte, i, j int) int + +// encodeBlock has the same semantics as in encode_other.go. +// +//go:noescape +func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s new file mode 100644 index 000000000..adfd979fe --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_amd64.s @@ -0,0 +1,730 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a +// Go toolchain regression. See https://github.com/golang/go/issues/15426 and +// https://github.com/golang/snappy/issues/29 +// +// As a workaround, the package was built with a known good assembler, and +// those instructions were disassembled by "objdump -d" to yield the +// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 +// style comments, in AT&T asm syntax. Note that rsp here is a physical +// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). +// The instructions were then encoded as "BYTE $0x.." sequences, which assemble +// fine on Go 1.6. + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - AX len(lit) +// - BX n +// - DX return value +// - DI &dst[i] +// - R10 &lit[0] +// +// The 24 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $24-56 + MOVQ dst_base+0(FP), DI + MOVQ lit_base+24(FP), R10 + MOVQ lit_len+32(FP), AX + MOVQ AX, DX + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT oneByte + CMPL BX, $256 + JLT twoBytes + +threeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + ADDQ $3, DX + JMP memmove + +twoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + ADDQ $2, DX + JMP memmove + +oneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + ADDQ $1, DX + +memmove: + MOVQ DX, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - AX length +// - SI &dst[0] +// - DI &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVQ dst_base+0(FP), DI + MOVQ DI, SI + MOVQ offset+24(FP), R11 + MOVQ length+32(FP), AX + +loop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP loop0 + +step1: + // if length > 64 { etc } + CMPL AX, $64 + JLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMPL AX, $12 + JGE step3 + CMPL R11, $2048 + JGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - DX &src[0] +// - SI &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), R14 + MOVQ i+24(FP), R15 + MOVQ j+32(FP), SI + ADDQ DX, R14 + ADDQ DX, R15 + ADDQ DX, SI + MOVQ R14, R13 + SUBQ $8, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA cmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE bsf + ADDQ $8, R15 + ADDQ $8, SI + JMP cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE extendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE extendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - AX . . +// - BX . . +// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). +// - DX 64 &src[0], tableSize +// - SI 72 &src[s] +// - DI 80 &dst[d] +// - R9 88 sLimit +// - R10 . &src[nextEmit] +// - R11 96 prevHash, currHash, nextHash, offset +// - R12 104 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 112 candidate +// +// The second column (56, 64, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. +TEXT ·encodeBlock(SB), 0, $32888-56 + MOVQ dst_base+0(FP), DI + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVQ $24, CX + MOVQ $256, DX + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + CMPQ DX, $16384 + JGE varTable + CMPQ DX, R14 + JGE varTable + SUBQ $1, CX + SHLQ $1, DX + JMP calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU + // writes 16 bytes, so we can do only tableSize/8 writes instead of the + // 2048 writes that would zero-initialize all of table's 32768 bytes. + SHRQ $3, DX + LEAQ table-32768(SP), BX + PXOR X0, X0 + +memclr: + MOVOU X0, 0(BX) + ADDQ $16, BX + SUBQ $1, DX + JNZ memclr + + // !!! DX = &src[0] + MOVQ SI, DX + + // sLimit := len(src) - inputMargin + MOVQ R14, R9 + SUBQ $15, R9 + + // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't + // change for the rest of the function. + MOVQ CX, 56(SP) + MOVQ DX, 64(SP) + MOVQ R9, 88(SP) + + // nextEmit := 0 + MOVQ DX, R10 + + // s := 1 + ADDQ $1, SI + + // nextHash := hash(load32(src, s), shift) + MOVL 0(SI), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + +outer: + // for { etc } + + // skip := 32 + MOVQ $32, R12 + + // nextS := s + MOVQ SI, R13 + + // candidate := 0 + MOVQ $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVQ R13, SI + + // bytesBetweenHashLookups := skip >> 5 + MOVQ R12, R14 + SHRQ $5, R14 + + // nextS = s + bytesBetweenHashLookups + ADDQ R14, R13 + + // skip += bytesBetweenHashLookups + ADDQ R14, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVQ R13, AX + SUBQ DX, AX + CMPQ AX, R9 + JA emitRemainder + + // candidate = int(table[nextHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[nextHash] = uint16(s) + MOVQ SI, AX + SUBQ DX, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // nextHash = hash(load32(src, nextS), shift) + MOVL 0(R13), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVL 0(SI), AX + MOVL (DX)(R15*1), BX + CMPL AX, BX + JNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVQ SI, AX + SUBQ R10, AX + CMPQ AX, $16 + JLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT inlineEmitLiteralOneByte + CMPL BX, $256 + JLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVQ SI, 72(SP) + MOVQ DI, 80(SP) + MOVQ R15, 112(SP) + CALL runtime·memmove(SB) + MOVQ 56(SP), CX + MOVQ 64(SP), DX + MOVQ 72(SP), SI + MOVQ 80(SP), DI + MOVQ 88(SP), R9 + MOVQ 112(SP), R15 + JMP inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB AX, BX + SUBB $1, BX + SHLB $2, BX + MOVB BX, (DI) + ADDQ $1, DI + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(R10), X0 + MOVOU X0, 0(DI) + ADDQ AX, DI + +inner1: + // for { etc } + + // base := s + MOVQ SI, R12 + + // !!! offset := base - candidate + MOVQ R12, R11 + SUBQ R15, R11 + SUBQ DX, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVQ src_len+32(FP), R14 + ADDQ DX, R14 + + // !!! R13 = &src[len(src) - 8] + MOVQ R14, R13 + SUBQ $8, R13 + + // !!! R15 = &src[candidate + 4] + ADDQ $4, R15 + ADDQ DX, R15 + + // !!! s += 4 + ADDQ $4, SI + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA inlineExtendMatchCmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE inlineExtendMatchBSF + ADDQ $8, R15 + ADDQ $8, SI + JMP inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + JMP inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE inlineExtendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE inlineExtendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVQ SI, AX + SUBQ R12, AX + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + CMPL AX, $64 + JLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + CMPL AX, $12 + JGE inlineEmitCopyStep3 + CMPL R11, $2048 + JGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + JMP inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVQ SI, R10 + + // if s >= sLimit { goto emitRemainder } + MOVQ SI, AX + SUBQ DX, AX + CMPQ AX, R9 + JAE emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVQ -1(SI), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // table[prevHash] = uint16(s-1) + MOVQ SI, AX + SUBQ DX, AX + SUBQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // currHash := hash(uint32(x>>8), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // candidate = int(table[currHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[currHash] = uint16(s) + ADDQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVL (DX)(R15*1), BX + CMPL R14, BX + JEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // s++ + ADDQ $1, SI + + // break out of the inner1 for loop, i.e. continue the outer loop. + JMP outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVQ src_len+32(FP), AX + ADDQ DX, AX + CMPQ R10, AX + JEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVQ DI, 0(SP) + MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ R10, 24(SP) + SUBQ R10, AX + MOVQ AX, 32(SP) + MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVQ DI, 80(SP) + CALL ·emitLiteral(SB) + MOVQ 80(SP), DI + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADDQ 48(SP), DI + +encodeBlockEnd: + MOVQ dst_base+0(FP), AX + SUBQ AX, DI + MOVQ DI, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go new file mode 100644 index 000000000..dbcae905e --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -0,0 +1,238 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine !gc noasm + +package snappy + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= len(lit) && len(lit) <= 65536 +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + default: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + } + return i + copy(dst[i:], lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= 65535 +// 4 <= length && length <= 65535 +func emitCopy(dst []byte, offset, length int) int { + i := 0 + // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The + // threshold for this loop is a little higher (at 68 = 64 + 4), and the + // length emitted down below is is a little lower (at 60 = 64 - 4), because + // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed + // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as + // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as + // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a + // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an + // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. + for length >= 68 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[i+0] = 63<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 64 + } + if length > 64 { + // Emit a length 60 copy, encoded as 3 bytes. + dst[i+0] = 59<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 60 + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[i+0] = uint8(length-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + return i + 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + return i + 2 +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} + +func hash(u, shift uint32) uint32 { + return (u * 0x1e35a7bd) >> shift +} + +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxBlockSize and maxBlockSize == 65536. + const ( + maxTableSize = 1 << 14 + // tableMask is redundant, but helps the compiler eliminate bounds + // checks. + tableMask = maxTableSize - 1 + ) + shift := uint32(32 - 8) + for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + shift-- + } + // In Go, all array elements are zero-initialized, so there is no advantage + // to a smaller tableSize per se. However, it matches the C++ algorithm, + // and in the asm versions of this code, we can get away with zeroing only + // the first tableSize elements. + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s), shift) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS), shift) + if load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { + } + + d += emitCopy(dst[d:], base-candidate, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x>>0), shift) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x>>8), shift) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x>>16), shift) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go new file mode 100644 index 000000000..ece692ea4 --- /dev/null +++ b/vendor/github.com/golang/snappy/snappy.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the Snappy compression format. It aims for very +// high speeds and reasonable compression. +// +// There are actually two Snappy formats: block and stream. They are related, +// but different: trying to decompress block-compressed data as a Snappy stream +// will fail, and vice versa. The block format is the Decode and Encode +// functions and the stream format is the Reader and Writer types. +// +// The block format, the more common case, is used when the complete size (the +// number of bytes) of the original data is known upfront, at the time +// compression starts. The stream format, also known as the framing format, is +// for when that isn't always true. +// +// The canonical, C++ implementation is at https://github.com/google/snappy and +// it only implements the block format. +package snappy // import "github.com/golang/snappy" + +import ( + "hash/crc32" +) + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer issued by most + encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in + [1, 65). The length is 1 + m. The offset is the little-endian unsigned + integer denoted by the next 4 bytes. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) + +const ( + checksumSize = 4 + chunkHeaderSize = 4 + magicChunk = "\xff\x06\x00\x00" + magicBody + magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 + + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 + + obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func crc(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/id01/go-lz4/.gitignore b/vendor/github.com/id01/go-lz4/.gitignore new file mode 100644 index 000000000..be64db617 --- /dev/null +++ b/vendor/github.com/id01/go-lz4/.gitignore @@ -0,0 +1 @@ +/lz4-example/lz4-example diff --git a/vendor/github.com/id01/go-lz4/.travis.yml b/vendor/github.com/id01/go-lz4/.travis.yml new file mode 100644 index 000000000..d5870798f --- /dev/null +++ b/vendor/github.com/id01/go-lz4/.travis.yml @@ -0,0 +1,9 @@ +language: go + +go: + - 1.1 + - 1.2 + - 1.3 + - 1.4 + - 1.5 + - tip diff --git a/vendor/github.com/id01/go-lz4/LICENSE b/vendor/github.com/id01/go-lz4/LICENSE new file mode 100644 index 000000000..0545977b7 --- /dev/null +++ b/vendor/github.com/id01/go-lz4/LICENSE @@ -0,0 +1,24 @@ +Copyright 2011-2012 Branimir Karadzic. All rights reserved. +Copyright 2013 Damian Gryski. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/id01/go-lz4/README.md b/vendor/github.com/id01/go-lz4/README.md new file mode 100644 index 000000000..fd65d1590 --- /dev/null +++ b/vendor/github.com/id01/go-lz4/README.md @@ -0,0 +1,76 @@ +go-lz4 +====== + +go-lz4 is port of LZ4 lossless compression algorithm to Go. The original C code +is located at: + +https://github.com/Cyan4973/lz4 + +Status +------ +[![Build Status](https://secure.travis-ci.org/bkaradzic/go-lz4.png)](http://travis-ci.org/bkaradzic/go-lz4) +[![GoDoc](https://godoc.org/github.com/bkaradzic/go-lz4?status.png)](https://godoc.org/github.com/bkaradzic/go-lz4) + +Usage +----- + + go get github.com/bkaradzic/go-lz4 + + import "github.com/bkaradzic/go-lz4" + +The package name is `lz4` + +Notes +----- + +* go-lz4 saves a uint32 with the original uncompressed length at the beginning + of the encoded buffer. They may get in the way of interoperability with + other implementations. + +Alternative +----------- + +https://github.com/pierrec/lz4 + +Contributors +------------ + +Damian Gryski ([@dgryski](https://github.com/dgryski)) +Dustin Sallings ([@dustin](https://github.com/dustin)) + +Contact +------- + +[@bkaradzic](https://twitter.com/bkaradzic) +http://www.stuckingeometry.com + +Project page +https://github.com/bkaradzic/go-lz4 + +License +------- + +Copyright 2011-2012 Branimir Karadzic. All rights reserved. +Copyright 2013 Damian Gryski. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/id01/go-lz4/fuzz.go b/vendor/github.com/id01/go-lz4/fuzz.go new file mode 100644 index 000000000..e4989de9f --- /dev/null +++ b/vendor/github.com/id01/go-lz4/fuzz.go @@ -0,0 +1,23 @@ +// +build gofuzz + +package lz4 + +import "encoding/binary" + +func Fuzz(data []byte) int { + + if len(data) < 4 { + return 0 + } + + ln := binary.LittleEndian.Uint32(data) + if ln > (1 << 21) { + return 0 + } + + if _, err := Decode(nil, data); err != nil { + return 0 + } + + return 1 +} diff --git a/vendor/github.com/id01/go-lz4/reader.go b/vendor/github.com/id01/go-lz4/reader.go new file mode 100644 index 000000000..254745a37 --- /dev/null +++ b/vendor/github.com/id01/go-lz4/reader.go @@ -0,0 +1,196 @@ +/* + * Copyright 2011-2012 Branimir Karadzic. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +package lz4 + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt indicates the input was corrupt + ErrCorrupt = errors.New("corrupt input") + ErrOutOfMemory = errors.New("out of memory") // Indicates dst wasn't large enough +) + +const ( + mlBits = 4 + mlMask = (1 << mlBits) - 1 + runBits = 8 - mlBits + runMask = (1 << runBits) - 1 +) + +type decoder struct { + src []byte + dst []byte + spos uint32 + dpos uint32 + ref uint32 +} + +func (d *decoder) readByte() (uint8, error) { + if int(d.spos) == len(d.src) { + return 0, io.EOF + } + b := d.src[d.spos] + d.spos++ + return b, nil +} + +func (d *decoder) getLen() (uint32, error) { + + length := uint32(0) + ln, err := d.readByte() + if err != nil { + return 0, ErrCorrupt + } + for ln == 255 { + length += 255 + ln, err = d.readByte() + if err != nil { + return 0, ErrCorrupt + } + } + length += uint32(ln) + + return length, nil +} + +func (d *decoder) cp(length, decr uint32) { + + if int(d.ref+length) < int(d.dpos) { + copy(d.dst[d.dpos:], d.dst[d.ref:d.ref+length]) + } else { + for ii := uint32(0); ii < length; ii++ { + d.dst[d.dpos+ii] = d.dst[d.ref+ii] + } + } + d.dpos += length + d.ref += length - decr +} + +func (d *decoder) finish(err error) error { + if err == io.EOF { + return nil + } + + return err +} + +// Decode returns the decoded form of src. The returned slice is a subslice of dst. +// Must have an input of dst with large enough length to hold the decompressed buffer. +func Decode(dst, src []byte) ([]byte, error) { + + if len(src) < 4 { + return nil, ErrCorrupt + } + + compressedLen := binary.LittleEndian.Uint32(src) + + if compressedLen == 0 { + return nil, ErrCorrupt + } + + if compressedLen > MaxInputSize { + return nil, ErrTooLarge + } + + d := decoder{src: src, dst: dst, spos: 4} + + decr := []uint32{0, 3, 2, 3} + + for { + code, err := d.readByte() + if err != nil { + return d.dst[:d.dpos], d.finish(err) + } + + length := uint32(code >> mlBits) + if length == runMask { + ln, err := d.getLen() + if err != nil { + return nil, ErrCorrupt + } + length += ln + } + + if int(d.spos+length) > len(d.src) || int(d.dpos+length) > len(d.dst) { + return nil, ErrOutOfMemory + } + + for ii := uint32(0); ii < length; ii++ { + d.dst[d.dpos+ii] = d.src[d.spos+ii] + } + + d.spos += length + d.dpos += length + + if int(d.spos) == len(d.src) { + return d.dst[:d.dpos], nil + } + + if int(d.spos+2) >= len(d.src) { + return nil, ErrCorrupt + } + + back := uint32(d.src[d.spos]) | uint32(d.src[d.spos+1])<<8 + + if back > d.dpos { + return nil, ErrCorrupt + } + + d.spos += 2 + d.ref = d.dpos - back + + length = uint32(code & mlMask) + if length == mlMask { + ln, err := d.getLen() + if err != nil { + return nil, ErrCorrupt + } + length += ln + } + + literal := d.dpos - d.ref + + if literal < 4 { + if int(d.dpos+4) > len(d.dst) { + return nil, ErrOutOfMemory + } + + d.cp(4, decr[literal]) + } else { + length += 4 + } + + if d.dpos+length > uint32(len(d.dst)) { + return nil, ErrOutOfMemory + } + + d.cp(length, 0) + } +} diff --git a/vendor/github.com/id01/go-lz4/writer.go b/vendor/github.com/id01/go-lz4/writer.go new file mode 100644 index 000000000..c69563919 --- /dev/null +++ b/vendor/github.com/id01/go-lz4/writer.go @@ -0,0 +1,190 @@ +/* + * Copyright 2011-2012 Branimir Karadzic. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +package lz4 + +import ( + "encoding/binary" + "errors" +) + +const ( + minMatch = 4 + hashLog = 17 + hashTableSize = 1 << hashLog + hashShift = (minMatch * 8) - hashLog + incompressible uint32 = 128 + uninitHash = 0x88888888 + + // MaxInputSize is the largest buffer than can be compressed in a single block + MaxInputSize = 0x7E000000 +) + +var ( + // ErrTooLarge indicates the input buffer was too large + ErrTooLarge = errors.New("input too large") +) + +type encoder struct { + src []byte + dst []byte + hashTable []uint32 + pos uint32 + anchor uint32 + dpos uint32 +} + +// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length +func CompressBound(isize int) int { + if isize > MaxInputSize { + return 0 + } + return isize + ((isize) / 255) + 16 + 4 +} + +func (e *encoder) writeLiterals(length, mlLen, pos uint32) { + + ln := length + + var code byte + if ln > runMask-1 { + code = runMask + } else { + code = byte(ln) + } + + if mlLen > mlMask-1 { + e.dst[e.dpos] = (code << mlBits) + byte(mlMask) + } else { + e.dst[e.dpos] = (code << mlBits) + byte(mlLen) + } + e.dpos++ + + if code == runMask { + ln -= runMask + for ; ln > 254; ln -= 255 { + e.dst[e.dpos] = 255 + e.dpos++ + } + + e.dst[e.dpos] = byte(ln) + e.dpos++ + } + + for ii := uint32(0); ii < length; ii++ { + e.dst[e.dpos+ii] = e.src[pos+ii] + } + + e.dpos += length +} + +// Encode returns the encoded form of src. The returned array may be a +// sub-slice of dst if it was large enough to hold the entire output. +func Encode(dst, src []byte) ([]byte, error) { + + if len(src) >= MaxInputSize { + return nil, ErrTooLarge + } + + if n := CompressBound(len(src)); len(dst) < n { + dst = make([]byte, n) + } + + e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)} + + e.dpos = 4 + + var ( + step uint32 = 1 + limit = incompressible + ) + + for { + if int(e.pos)+12 >= len(e.src) { + e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor) + binary.LittleEndian.PutUint32(dst, uint32(e.dpos-4)) // Subtract 4 because the compressed size isn't counted as part of the block + return e.dst[:e.dpos], nil + } + + sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0]) + + hash := (sequence * 2654435761) >> hashShift + ref := e.hashTable[hash] + uninitHash + e.hashTable[hash] = e.pos - uninitHash + + if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence { + if e.pos-e.anchor > limit { + limit <<= 1 + step += 1 + (step >> 2) + } + e.pos += step + continue + } + + if step > 1 { + e.hashTable[hash] = ref - uninitHash + e.pos -= step - 1 + step = 1 + continue + } + limit = incompressible + + ln := e.pos - e.anchor + back := e.pos - ref + + anchor := e.anchor + + e.pos += minMatch + ref += minMatch + e.anchor = e.pos + + for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] { + e.pos++ + ref++ + } + + mlLen := e.pos - e.anchor + + e.writeLiterals(ln, mlLen, anchor) + e.dst[e.dpos] = uint8(back) + e.dst[e.dpos+1] = uint8(back >> 8) + e.dpos += 2 + + if mlLen > mlMask-1 { + mlLen -= mlMask + for mlLen > 254 { + mlLen -= 255 + + e.dst[e.dpos] = 255 + e.dpos++ + } + + e.dst[e.dpos] = byte(mlLen) + e.dpos++ + } + + e.anchor = e.pos + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index a7e8e167b..60ec89942 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -98,6 +98,8 @@ github.com/golang/protobuf/ptypes github.com/golang/protobuf/ptypes/any github.com/golang/protobuf/ptypes/duration github.com/golang/protobuf/ptypes/timestamp +# github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db +github.com/golang/snappy # github.com/google/go-querystring v1.0.0 github.com/google/go-querystring/query # github.com/googleapis/gax-go/v2 v2.0.5