diff --git a/go.mod b/go.mod index 8e0a3aebc..fea9d2334 100644 --- a/go.mod +++ b/go.mod @@ -7,27 +7,23 @@ require ( github.com/Azure/azure-storage-blob-go v0.8.0 github.com/Azure/go-autorest/autorest/adal v0.6.0 // indirect github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd - github.com/OneOfOne/xxhash v1.2.7 github.com/a8m/tree v0.0.0-20181222104329-6a0b80129de4 github.com/abbot/go-http-auth v0.4.0 github.com/anacrolix/dms v1.1.0 github.com/atotto/clipboard v0.1.2 github.com/aws/aws-sdk-go v1.29.9 github.com/billziss-gh/cgofuse v1.2.0 + github.com/buengese/xxh32 v1.0.1 github.com/djherbis/times v1.2.0 github.com/dropbox/dropbox-sdk-go-unofficial v5.6.0+incompatible - github.com/etcd-io/bbolt v1.3.3 github.com/gabriel-vasile/mimetype v1.0.2 - github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect - github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db - github.com/google/go-cmp v0.3.1 // indirect github.com/google/go-querystring v1.0.0 // indirect github.com/gopherjs/gopherjs v0.0.0-20190812055157-5d271430af9f // indirect github.com/hanwen/go-fuse/v2 v2.0.3-0.20191108143333-152e6ac32d54 github.com/jlaffaye/ftp v0.0.0-20191218041957-e1b8fdd0dcc3 - github.com/id01/go-lz4 v1.0.3 github.com/jzelinskie/whirlpool v0.0.0-20170603002051-c19460b8caa6 github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect + github.com/klauspost/compress v1.10.1 github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect github.com/koofr/go-httpclient v0.0.0-20190818202018-e0dc8fd921dc github.com/koofr/go-koofrclient v0.0.0-20190724113126-8e5366da203a @@ -40,9 +36,10 @@ require ( github.com/ncw/swift v1.0.50 github.com/nsf/termbox-go v0.0.0-20200204031403-4d2b513ad8be github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd - github.com/onsi/ginkgo v1.9.0 // indirect - github.com/onsi/gomega v1.6.0 // indirect + github.com/onsi/ginkgo v1.12.0 // indirect + github.com/onsi/gomega v1.9.0 // indirect github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/pierrec/lz4 v2.4.1+incompatible github.com/pkg/errors v0.9.1 github.com/pkg/sftp v1.11.0 github.com/prometheus/client_golang v1.4.1 @@ -57,6 +54,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.5.1 github.com/t3rm1n4l/go-mega v0.0.0-20200117211730-79a813bb328d + github.com/ulikunitz/xz v0.5.7 github.com/xanzy/ssh-agent v0.2.1 github.com/youmark/pkcs8 v0.0.0-20191102193632-94c173a94d60 github.com/yunify/qingstor-sdk-go/v3 v3.2.0 diff --git a/go.sum b/go.sum index 888271ac2..5e6fad547 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,7 @@ github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbt github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/OneOfOne/xxhash v1.2.7 h1:fzrmmkskv067ZQbd9wERNGuxckWw67dyzoMG62p7LMo= -github.com/OneOfOne/xxhash v1.2.7/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd h1:+CYOsXi89xOqBkj7CuEJjA2It+j+R3ngUZEydr6mtkw= github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd/go.mod h1:wngxua9XCNjvHjDiTiV26DaKDT+0c63QR6H5hjVUUxw= @@ -77,6 +76,8 @@ github.com/billziss-gh/cgofuse v1.2.0 h1:FMdQSygSBpD4yEPENJcmvfCdmNWMVkPLlD7wWdl github.com/billziss-gh/cgofuse v1.2.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM= github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo= github.com/bradfitz/iter v0.0.0-20190303215204-33e6a9893b0c/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo= +github.com/buengese/xxh32 v1.0.1 h1:aNZNg2XxotiTr6JD+R4bzmL1uzMZ2KEKvxyj4P1Z1Xw= +github.com/buengese/xxh32 v1.0.1/go.mod h1:Q5GTtu7m/GuqzCc8YZ0n+oetaGFwW7oy291HvqLTZFk= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= @@ -107,11 +108,13 @@ github.com/dropbox/dropbox-sdk-go-unofficial v5.6.0+incompatible/go.mod h1:lr+Lh github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/frankban/quicktest v1.7.3 h1:kV0lw0TH1j1hozahVmcpFCsbV5hcS4ZalH+U7UoeTow= +github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE1sVDXkjnig= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gabriel-vasile/mimetype v1.0.2 h1:GKCo1TUCg0pV0R4atTcaLv/9SI2W9xPgMySZxUxcJOE= github.com/gabriel-vasile/mimetype v1.0.2/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= @@ -186,8 +189,6 @@ github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.0.0/go.mod h1:4qWG/gcEcfX4z/mBDHJ++3ReCw9ibxbsNJbcucJdbSo= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/id01/go-lz4 v1.0.3 h1:D3krbAf5BppFsRSVa75yFo+JMxlTqFwuYpyHQAOgYds= -github.com/id01/go-lz4 v1.0.3/go.mod h1:G8scWkW5nw6fEwIREHZcWy3qddP/Go9IImmcit+bTzw= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jlaffaye/ftp v0.0.0-20190624084859-c1312a7102bf/go.mod h1:lli8NYPQOFy3O++YmYbqVgOcQ1JPCwdOy+5zSjKJ9qY= @@ -210,6 +211,8 @@ github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uia github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.10.1 h1:a/QY0o9S6wCi0XhxaMX/QmusicNUqCqFugR6WKPOSoQ= +github.com/klauspost/compress v1.10.1/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= @@ -264,16 +267,19 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd h1:+iAPaTbi1gZpcpDwe/BW1fx7Xoesv69hLNGPheoyhBs= github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd/go.mod h1:4soZNh0zW0LtYGdQ416i0jO0EIqMGcbtaspRS4BDvRQ= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.9.0 h1:SZjF721BByVj8QH636/8S2DnX4n0Re3SteMmw3N+tzc= -github.com/onsi/ginkgo v1.9.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.6.0 h1:8XTW0fcJZEq9q+Upcyws4JSGua2MFysCL5xkaSgHc+M= -github.com/onsi/gomega v1.6.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU= +github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg= +github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uCmhjJSsY78Mcuh7MVkNjTzmHx1yBzizSU= github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg= github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= +github.com/pierrec/lz4 v2.4.1+incompatible h1:mFe7ttWaflA46Mhqh+jUfjp2qTbPYxLB2/OyBppH9dg= +github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -364,6 +370,8 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1 github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ulikunitz/xz v0.5.7 h1:YvTNdFzX6+W5m9msiYg/zpkSURPPtOlzbqYjrFn7Yt4= +github.com/ulikunitz/xz v0.5.7/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70= github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4= @@ -481,6 +489,7 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191210023423-ac6580df4449 h1:gSbV7h1NRL2G1xTg/owz62CST1oJBmxy4QpMMregXVQ= golang.org/x/sys v0.0.0-20191210023423-ac6580df4449/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -583,6 +592,7 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= @@ -595,6 +605,8 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/vendor/github.com/OneOfOne/xxhash/.gitignore b/vendor/github.com/OneOfOne/xxhash/.gitignore deleted file mode 100644 index f4faa7f8f..000000000 --- a/vendor/github.com/OneOfOne/xxhash/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -*.txt -*.pprof -cmap2/ -cache/ diff --git a/vendor/github.com/OneOfOne/xxhash/.travis.yml b/vendor/github.com/OneOfOne/xxhash/.travis.yml deleted file mode 100644 index 1c6dc55bc..000000000 --- a/vendor/github.com/OneOfOne/xxhash/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -language: go -sudo: false - -go: - - "1.10" - - "1.11" - - "1.12" - - master - -script: - - go test -tags safe ./... - - go test ./... - - diff --git a/vendor/github.com/OneOfOne/xxhash/LICENSE b/vendor/github.com/OneOfOne/xxhash/LICENSE deleted file mode 100644 index 9e30b4f34..000000000 --- a/vendor/github.com/OneOfOne/xxhash/LICENSE +++ /dev/null @@ -1,187 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. diff --git a/vendor/github.com/OneOfOne/xxhash/README.md b/vendor/github.com/OneOfOne/xxhash/README.md deleted file mode 100644 index 23174eb56..000000000 --- a/vendor/github.com/OneOfOne/xxhash/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# xxhash [![GoDoc](https://godoc.org/github.com/OneOfOne/xxhash?status.svg)](https://godoc.org/github.com/OneOfOne/xxhash) [![Build Status](https://travis-ci.org/OneOfOne/xxhash.svg?branch=master)](https://travis-ci.org/OneOfOne/xxhash) [![Coverage](https://gocover.io/_badge/github.com/OneOfOne/xxhash)](https://gocover.io/github.com/OneOfOne/xxhash) - -This is a native Go implementation of the excellent [xxhash](https://github.com/Cyan4973/xxHash)* algorithm, an extremely fast non-cryptographic Hash algorithm, working at speeds close to RAM limits. - -* The C implementation is ([Copyright](https://github.com/Cyan4973/xxHash/blob/master/LICENSE) (c) 2012-2014, Yann Collet) - -## Install - - go get github.com/OneOfOne/xxhash - -## Features - -* On Go 1.7+ the pure go version is faster than CGO for all inputs. -* Supports ChecksumString{32,64} xxhash{32,64}.WriteString, which uses no copies when it can, falls back to copy on appengine. -* The native version falls back to a less optimized version on appengine due to the lack of unsafe. -* Almost as fast as the mostly pure assembly version written by the brilliant [cespare](https://github.com/cespare/xxhash), while also supporting seeds. -* To manually toggle the appengine version build with `-tags safe`. - -## Benchmark - -### Core i7-4790 @ 3.60GHz, Linux 4.12.6-1-ARCH (64bit), Go tip (+ff90f4af66 2017-08-19) - -```bash -➤ go test -bench '64' -count 5 -tags cespare | benchstat /dev/stdin -name time/op - -# https://github.com/cespare/xxhash -XXSum64Cespare/Func-8 160ns ± 2% -XXSum64Cespare/Struct-8 173ns ± 1% -XXSum64ShortCespare/Func-8 6.78ns ± 1% -XXSum64ShortCespare/Struct-8 19.6ns ± 2% - -# this package (default mode, using unsafe) -XXSum64/Func-8 170ns ± 1% -XXSum64/Struct-8 182ns ± 1% -XXSum64Short/Func-8 13.5ns ± 3% -XXSum64Short/Struct-8 20.4ns ± 0% - -# this package (appengine, *not* using unsafe) -XXSum64/Func-8 241ns ± 5% -XXSum64/Struct-8 243ns ± 6% -XXSum64Short/Func-8 15.2ns ± 2% -XXSum64Short/Struct-8 23.7ns ± 5% - -CRC64ISO-8 1.23µs ± 1% -CRC64ISOString-8 2.71µs ± 4% -CRC64ISOShort-8 22.2ns ± 3% - -Fnv64-8 2.34µs ± 1% -Fnv64Short-8 74.7ns ± 8% -# -``` - -## Usage - -```go - h := xxhash.New64() - // r, err := os.Open("......") - // defer f.Close() - r := strings.NewReader(F) - io.Copy(h, r) - fmt.Println("xxhash.Backend:", xxhash.Backend) - fmt.Println("File checksum:", h.Sum64()) -``` - -[playground](http://play.golang.org/p/rhRN3RdQyd) - -## TODO - -* Rewrite the 32bit version to be more optimized. -* General cleanup as the Go inliner gets smarter. - -## License - -This project is released under the Apache v2. licence. See [LICENCE](LICENCE) for more details. diff --git a/vendor/github.com/OneOfOne/xxhash/go.mod b/vendor/github.com/OneOfOne/xxhash/go.mod deleted file mode 100644 index c6da85e0a..000000000 --- a/vendor/github.com/OneOfOne/xxhash/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/OneOfOne/xxhash - -go 1.11 diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash.go b/vendor/github.com/OneOfOne/xxhash/xxhash.go deleted file mode 100644 index 634150609..000000000 --- a/vendor/github.com/OneOfOne/xxhash/xxhash.go +++ /dev/null @@ -1,194 +0,0 @@ -package xxhash - -import "hash" - -const ( - prime32x1 uint32 = 2654435761 - prime32x2 uint32 = 2246822519 - prime32x3 uint32 = 3266489917 - prime32x4 uint32 = 668265263 - prime32x5 uint32 = 374761393 - - prime64x1 uint64 = 11400714785074694791 - prime64x2 uint64 = 14029467366897019727 - prime64x3 uint64 = 1609587929392839161 - prime64x4 uint64 = 9650029242287828579 - prime64x5 uint64 = 2870177450012600261 - - maxInt32 int32 = (1<<31 - 1) - - // precomputed zero Vs for seed 0 - zero64x1 = 0x60ea27eeadc0b5d6 - zero64x2 = 0xc2b2ae3d27d4eb4f - zero64x3 = 0x0 - zero64x4 = 0x61c8864e7a143579 -) - -func NewHash32() hash.Hash { return New32() } -func NewHash64() hash.Hash { return New64() } - -// Checksum32 returns the checksum of the input data with the seed set to 0. -func Checksum32(in []byte) uint32 { - return Checksum32S(in, 0) -} - -// ChecksumString32 returns the checksum of the input data, without creating a copy, with the seed set to 0. -func ChecksumString32(s string) uint32 { - return ChecksumString32S(s, 0) -} - -type XXHash32 struct { - mem [16]byte - ln, memIdx int32 - v1, v2, v3, v4 uint32 - seed uint32 -} - -// Size returns the number of bytes Sum will return. -func (xx *XXHash32) Size() int { - return 4 -} - -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (xx *XXHash32) BlockSize() int { - return 16 -} - -// NewS32 creates a new hash.Hash32 computing the 32bit xxHash checksum starting with the specific seed. -func NewS32(seed uint32) (xx *XXHash32) { - xx = &XXHash32{ - seed: seed, - } - xx.Reset() - return -} - -// New32 creates a new hash.Hash32 computing the 32bit xxHash checksum starting with the seed set to 0. -func New32() *XXHash32 { - return NewS32(0) -} - -func (xx *XXHash32) Reset() { - xx.v1 = xx.seed + prime32x1 + prime32x2 - xx.v2 = xx.seed + prime32x2 - xx.v3 = xx.seed - xx.v4 = xx.seed - prime32x1 - xx.ln, xx.memIdx = 0, 0 -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -func (xx *XXHash32) Sum(in []byte) []byte { - s := xx.Sum32() - return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// Checksum64 an alias for Checksum64S(in, 0) -func Checksum64(in []byte) uint64 { - return Checksum64S(in, 0) -} - -// ChecksumString64 returns the checksum of the input data, without creating a copy, with the seed set to 0. -func ChecksumString64(s string) uint64 { - return ChecksumString64S(s, 0) -} - -type XXHash64 struct { - v1, v2, v3, v4 uint64 - seed uint64 - ln uint64 - mem [32]byte - memIdx int8 -} - -// Size returns the number of bytes Sum will return. -func (xx *XXHash64) Size() int { - return 8 -} - -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (xx *XXHash64) BlockSize() int { - return 32 -} - -// NewS64 creates a new hash.Hash64 computing the 64bit xxHash checksum starting with the specific seed. -func NewS64(seed uint64) (xx *XXHash64) { - xx = &XXHash64{ - seed: seed, - } - xx.Reset() - return -} - -// New64 creates a new hash.Hash64 computing the 64bit xxHash checksum starting with the seed set to 0x0. -func New64() *XXHash64 { - return NewS64(0) -} - -func (xx *XXHash64) Reset() { - xx.ln, xx.memIdx = 0, 0 - xx.v1, xx.v2, xx.v3, xx.v4 = resetVs64(xx.seed) -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -func (xx *XXHash64) Sum(in []byte) []byte { - s := xx.Sum64() - return append(in, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// force the compiler to use ROTL instructions - -func rotl32_1(x uint32) uint32 { return (x << 1) | (x >> (32 - 1)) } -func rotl32_7(x uint32) uint32 { return (x << 7) | (x >> (32 - 7)) } -func rotl32_11(x uint32) uint32 { return (x << 11) | (x >> (32 - 11)) } -func rotl32_12(x uint32) uint32 { return (x << 12) | (x >> (32 - 12)) } -func rotl32_13(x uint32) uint32 { return (x << 13) | (x >> (32 - 13)) } -func rotl32_17(x uint32) uint32 { return (x << 17) | (x >> (32 - 17)) } -func rotl32_18(x uint32) uint32 { return (x << 18) | (x >> (32 - 18)) } - -func rotl64_1(x uint64) uint64 { return (x << 1) | (x >> (64 - 1)) } -func rotl64_7(x uint64) uint64 { return (x << 7) | (x >> (64 - 7)) } -func rotl64_11(x uint64) uint64 { return (x << 11) | (x >> (64 - 11)) } -func rotl64_12(x uint64) uint64 { return (x << 12) | (x >> (64 - 12)) } -func rotl64_18(x uint64) uint64 { return (x << 18) | (x >> (64 - 18)) } -func rotl64_23(x uint64) uint64 { return (x << 23) | (x >> (64 - 23)) } -func rotl64_27(x uint64) uint64 { return (x << 27) | (x >> (64 - 27)) } -func rotl64_31(x uint64) uint64 { return (x << 31) | (x >> (64 - 31)) } - -func mix64(h uint64) uint64 { - h ^= h >> 33 - h *= prime64x2 - h ^= h >> 29 - h *= prime64x3 - h ^= h >> 32 - return h -} - -func resetVs64(seed uint64) (v1, v2, v3, v4 uint64) { - if seed == 0 { - return zero64x1, zero64x2, zero64x3, zero64x4 - } - return (seed + prime64x1 + prime64x2), (seed + prime64x2), (seed), (seed - prime64x1) -} - -// borrowed from cespare -func round64(h, v uint64) uint64 { - h += v * prime64x2 - h = rotl64_31(h) - h *= prime64x1 - return h -} - -func mergeRound64(h, v uint64) uint64 { - v = round64(0, v) - h ^= v - h = h*prime64x1 + prime64x4 - return h -} diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash_go17.go b/vendor/github.com/OneOfOne/xxhash/xxhash_go17.go deleted file mode 100644 index ae48e0c5c..000000000 --- a/vendor/github.com/OneOfOne/xxhash/xxhash_go17.go +++ /dev/null @@ -1,161 +0,0 @@ -package xxhash - -func u32(in []byte) uint32 { - return uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 -} - -func u64(in []byte) uint64 { - return uint64(in[0]) | uint64(in[1])<<8 | uint64(in[2])<<16 | uint64(in[3])<<24 | uint64(in[4])<<32 | uint64(in[5])<<40 | uint64(in[6])<<48 | uint64(in[7])<<56 -} - -// Checksum32S returns the checksum of the input bytes with the specific seed. -func Checksum32S(in []byte, seed uint32) (h uint32) { - var i int - - if len(in) > 15 { - var ( - v1 = seed + prime32x1 + prime32x2 - v2 = seed + prime32x2 - v3 = seed + 0 - v4 = seed - prime32x1 - ) - for ; i < len(in)-15; i += 16 { - in := in[i : i+16 : len(in)] - v1 += u32(in[0:4:len(in)]) * prime32x2 - v1 = rotl32_13(v1) * prime32x1 - - v2 += u32(in[4:8:len(in)]) * prime32x2 - v2 = rotl32_13(v2) * prime32x1 - - v3 += u32(in[8:12:len(in)]) * prime32x2 - v3 = rotl32_13(v3) * prime32x1 - - v4 += u32(in[12:16:len(in)]) * prime32x2 - v4 = rotl32_13(v4) * prime32x1 - } - - h = rotl32_1(v1) + rotl32_7(v2) + rotl32_12(v3) + rotl32_18(v4) - - } else { - h = seed + prime32x5 - } - - h += uint32(len(in)) - for ; i <= len(in)-4; i += 4 { - in := in[i : i+4 : len(in)] - h += u32(in[0:4:len(in)]) * prime32x3 - h = rotl32_17(h) * prime32x4 - } - - for ; i < len(in); i++ { - h += uint32(in[i]) * prime32x5 - h = rotl32_11(h) * prime32x1 - } - - h ^= h >> 15 - h *= prime32x2 - h ^= h >> 13 - h *= prime32x3 - h ^= h >> 16 - - return -} - -func (xx *XXHash32) Write(in []byte) (n int, err error) { - i, ml := 0, int(xx.memIdx) - n = len(in) - xx.ln += int32(n) - - if d := 16 - ml; ml > 0 && ml+len(in) > 16 { - xx.memIdx += int32(copy(xx.mem[xx.memIdx:], in[:d])) - ml, in = 16, in[d:len(in):len(in)] - } else if ml+len(in) < 16 { - xx.memIdx += int32(copy(xx.mem[xx.memIdx:], in)) - return - } - - if ml > 0 { - i += 16 - ml - xx.memIdx += int32(copy(xx.mem[xx.memIdx:len(xx.mem):len(xx.mem)], in)) - in := xx.mem[:16:len(xx.mem)] - - xx.v1 += u32(in[0:4:len(in)]) * prime32x2 - xx.v1 = rotl32_13(xx.v1) * prime32x1 - - xx.v2 += u32(in[4:8:len(in)]) * prime32x2 - xx.v2 = rotl32_13(xx.v2) * prime32x1 - - xx.v3 += u32(in[8:12:len(in)]) * prime32x2 - xx.v3 = rotl32_13(xx.v3) * prime32x1 - - xx.v4 += u32(in[12:16:len(in)]) * prime32x2 - xx.v4 = rotl32_13(xx.v4) * prime32x1 - - xx.memIdx = 0 - } - - for ; i <= len(in)-16; i += 16 { - in := in[i : i+16 : len(in)] - xx.v1 += u32(in[0:4:len(in)]) * prime32x2 - xx.v1 = rotl32_13(xx.v1) * prime32x1 - - xx.v2 += u32(in[4:8:len(in)]) * prime32x2 - xx.v2 = rotl32_13(xx.v2) * prime32x1 - - xx.v3 += u32(in[8:12:len(in)]) * prime32x2 - xx.v3 = rotl32_13(xx.v3) * prime32x1 - - xx.v4 += u32(in[12:16:len(in)]) * prime32x2 - xx.v4 = rotl32_13(xx.v4) * prime32x1 - } - - if len(in)-i != 0 { - xx.memIdx += int32(copy(xx.mem[xx.memIdx:], in[i:len(in):len(in)])) - } - - return -} - -func (xx *XXHash32) Sum32() (h uint32) { - var i int32 - if xx.ln > 15 { - h = rotl32_1(xx.v1) + rotl32_7(xx.v2) + rotl32_12(xx.v3) + rotl32_18(xx.v4) - } else { - h = xx.seed + prime32x5 - } - - h += uint32(xx.ln) - - if xx.memIdx > 0 { - for ; i < xx.memIdx-3; i += 4 { - in := xx.mem[i : i+4 : len(xx.mem)] - h += u32(in[0:4:len(in)]) * prime32x3 - h = rotl32_17(h) * prime32x4 - } - - for ; i < xx.memIdx; i++ { - h += uint32(xx.mem[i]) * prime32x5 - h = rotl32_11(h) * prime32x1 - } - } - h ^= h >> 15 - h *= prime32x2 - h ^= h >> 13 - h *= prime32x3 - h ^= h >> 16 - - return -} - -// Checksum64S returns the 64bit xxhash checksum for a single input -func Checksum64S(in []byte, seed uint64) uint64 { - if len(in) == 0 && seed == 0 { - return 0xef46db3751d8e999 - } - - if len(in) > 31 { - return checksum64(in, seed) - } - - return checksum64Short(in, seed) -} diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash_safe.go b/vendor/github.com/OneOfOne/xxhash/xxhash_safe.go deleted file mode 100644 index e92ec29e0..000000000 --- a/vendor/github.com/OneOfOne/xxhash/xxhash_safe.go +++ /dev/null @@ -1,183 +0,0 @@ -// +build appengine safe ppc64le ppc64be mipsle mips s390x - -package xxhash - -// Backend returns the current version of xxhash being used. -const Backend = "GoSafe" - -func ChecksumString32S(s string, seed uint32) uint32 { - return Checksum32S([]byte(s), seed) -} - -func (xx *XXHash32) WriteString(s string) (int, error) { - if len(s) == 0 { - return 0, nil - } - return xx.Write([]byte(s)) -} - -func ChecksumString64S(s string, seed uint64) uint64 { - return Checksum64S([]byte(s), seed) -} - -func (xx *XXHash64) WriteString(s string) (int, error) { - if len(s) == 0 { - return 0, nil - } - return xx.Write([]byte(s)) -} - -func checksum64(in []byte, seed uint64) (h uint64) { - var ( - v1, v2, v3, v4 = resetVs64(seed) - - i int - ) - - for ; i < len(in)-31; i += 32 { - in := in[i : i+32 : len(in)] - v1 = round64(v1, u64(in[0:8:len(in)])) - v2 = round64(v2, u64(in[8:16:len(in)])) - v3 = round64(v3, u64(in[16:24:len(in)])) - v4 = round64(v4, u64(in[24:32:len(in)])) - } - - h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) - - h = mergeRound64(h, v1) - h = mergeRound64(h, v2) - h = mergeRound64(h, v3) - h = mergeRound64(h, v4) - - h += uint64(len(in)) - - for ; i < len(in)-7; i += 8 { - h ^= round64(0, u64(in[i:len(in):len(in)])) - h = rotl64_27(h)*prime64x1 + prime64x4 - } - - for ; i < len(in)-3; i += 4 { - h ^= uint64(u32(in[i:len(in):len(in)])) * prime64x1 - h = rotl64_23(h)*prime64x2 + prime64x3 - } - - for ; i < len(in); i++ { - h ^= uint64(in[i]) * prime64x5 - h = rotl64_11(h) * prime64x1 - } - - return mix64(h) -} - -func checksum64Short(in []byte, seed uint64) uint64 { - var ( - h = seed + prime64x5 + uint64(len(in)) - i int - ) - - for ; i < len(in)-7; i += 8 { - k := u64(in[i : i+8 : len(in)]) - h ^= round64(0, k) - h = rotl64_27(h)*prime64x1 + prime64x4 - } - - for ; i < len(in)-3; i += 4 { - h ^= uint64(u32(in[i:i+4:len(in)])) * prime64x1 - h = rotl64_23(h)*prime64x2 + prime64x3 - } - - for ; i < len(in); i++ { - h ^= uint64(in[i]) * prime64x5 - h = rotl64_11(h) * prime64x1 - } - - return mix64(h) -} - -func (xx *XXHash64) Write(in []byte) (n int, err error) { - var ( - ml = int(xx.memIdx) - d = 32 - ml - ) - - n = len(in) - xx.ln += uint64(n) - - if ml+len(in) < 32 { - xx.memIdx += int8(copy(xx.mem[xx.memIdx:len(xx.mem):len(xx.mem)], in)) - return - } - - i, v1, v2, v3, v4 := 0, xx.v1, xx.v2, xx.v3, xx.v4 - if ml > 0 && ml+len(in) > 32 { - xx.memIdx += int8(copy(xx.mem[xx.memIdx:len(xx.mem):len(xx.mem)], in[:d:len(in)])) - in = in[d:len(in):len(in)] - - in := xx.mem[0:32:len(xx.mem)] - - v1 = round64(v1, u64(in[0:8:len(in)])) - v2 = round64(v2, u64(in[8:16:len(in)])) - v3 = round64(v3, u64(in[16:24:len(in)])) - v4 = round64(v4, u64(in[24:32:len(in)])) - - xx.memIdx = 0 - } - - for ; i < len(in)-31; i += 32 { - in := in[i : i+32 : len(in)] - v1 = round64(v1, u64(in[0:8:len(in)])) - v2 = round64(v2, u64(in[8:16:len(in)])) - v3 = round64(v3, u64(in[16:24:len(in)])) - v4 = round64(v4, u64(in[24:32:len(in)])) - } - - if len(in)-i != 0 { - xx.memIdx += int8(copy(xx.mem[xx.memIdx:], in[i:len(in):len(in)])) - } - - xx.v1, xx.v2, xx.v3, xx.v4 = v1, v2, v3, v4 - - return -} - -func (xx *XXHash64) Sum64() (h uint64) { - var i int - if xx.ln > 31 { - v1, v2, v3, v4 := xx.v1, xx.v2, xx.v3, xx.v4 - h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) - - h = mergeRound64(h, v1) - h = mergeRound64(h, v2) - h = mergeRound64(h, v3) - h = mergeRound64(h, v4) - } else { - h = xx.seed + prime64x5 - } - - h += uint64(xx.ln) - if xx.memIdx > 0 { - in := xx.mem[:xx.memIdx] - for ; i < int(xx.memIdx)-7; i += 8 { - in := in[i : i+8 : len(in)] - k := u64(in[0:8:len(in)]) - k *= prime64x2 - k = rotl64_31(k) - k *= prime64x1 - h ^= k - h = rotl64_27(h)*prime64x1 + prime64x4 - } - - for ; i < int(xx.memIdx)-3; i += 4 { - in := in[i : i+4 : len(in)] - h ^= uint64(u32(in[0:4:len(in)])) * prime64x1 - h = rotl64_23(h)*prime64x2 + prime64x3 - } - - for ; i < int(xx.memIdx); i++ { - h ^= uint64(in[i]) * prime64x5 - h = rotl64_11(h) * prime64x1 - } - } - - return mix64(h) -} diff --git a/vendor/github.com/OneOfOne/xxhash/xxhash_unsafe.go b/vendor/github.com/OneOfOne/xxhash/xxhash_unsafe.go deleted file mode 100644 index 9c67f8e71..000000000 --- a/vendor/github.com/OneOfOne/xxhash/xxhash_unsafe.go +++ /dev/null @@ -1,241 +0,0 @@ -// +build !safe -// +build !appengine -// +build !ppc64le -// +build !mipsle -// +build !ppc64be -// +build !mips -// +build !s390x - -package xxhash - -import ( - "reflect" - "unsafe" -) - -// Backend returns the current version of xxhash being used. -const Backend = "GoUnsafe" - -// ChecksumString32S returns the checksum of the input data, without creating a copy, with the specific seed. -func ChecksumString32S(s string, seed uint32) uint32 { - if len(s) == 0 { - return Checksum32S(nil, seed) - } - ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) - return Checksum32S((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)], seed) -} - -func (xx *XXHash32) WriteString(s string) (int, error) { - if len(s) == 0 { - return 0, nil - } - - ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) - return xx.Write((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)]) -} - -// ChecksumString64S returns the checksum of the input data, without creating a copy, with the specific seed. -func ChecksumString64S(s string, seed uint64) uint64 { - if len(s) == 0 { - return Checksum64S(nil, seed) - } - - ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) - return Checksum64S((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)], seed) -} - -func (xx *XXHash64) WriteString(s string) (int, error) { - if len(s) == 0 { - return 0, nil - } - ss := (*reflect.StringHeader)(unsafe.Pointer(&s)) - return xx.Write((*[maxInt32]byte)(unsafe.Pointer(ss.Data))[:len(s):len(s)]) -} - -//go:nocheckptr -func checksum64(in []byte, seed uint64) uint64 { - var ( - wordsLen = len(in) >> 3 - words = ((*[maxInt32 / 8]uint64)(unsafe.Pointer(&in[0])))[:wordsLen:wordsLen] - - h uint64 = prime64x5 - - v1, v2, v3, v4 = resetVs64(seed) - - i int - ) - - for ; i < len(words)-3; i += 4 { - words := (*[4]uint64)(unsafe.Pointer(&words[i])) - - v1 = round64(v1, words[0]) - v2 = round64(v2, words[1]) - v3 = round64(v3, words[2]) - v4 = round64(v4, words[3]) - } - - h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) - - h = mergeRound64(h, v1) - h = mergeRound64(h, v2) - h = mergeRound64(h, v3) - h = mergeRound64(h, v4) - - h += uint64(len(in)) - - for _, k := range words[i:] { - h ^= round64(0, k) - h = rotl64_27(h)*prime64x1 + prime64x4 - } - - if in = in[wordsLen<<3 : len(in) : len(in)]; len(in) > 3 { - words := (*[1]uint32)(unsafe.Pointer(&in[0])) - h ^= uint64(words[0]) * prime64x1 - h = rotl64_23(h)*prime64x2 + prime64x3 - - in = in[4:len(in):len(in)] - } - - for _, b := range in { - h ^= uint64(b) * prime64x5 - h = rotl64_11(h) * prime64x1 - } - - return mix64(h) -} - -//go:nocheckptr -func checksum64Short(in []byte, seed uint64) uint64 { - var ( - h = seed + prime64x5 + uint64(len(in)) - i int - ) - - if len(in) > 7 { - var ( - wordsLen = len(in) >> 3 - words = ((*[maxInt32 / 8]uint64)(unsafe.Pointer(&in[0])))[:wordsLen:wordsLen] - ) - - for i := range words { - h ^= round64(0, words[i]) - h = rotl64_27(h)*prime64x1 + prime64x4 - } - - i = wordsLen << 3 - } - - if in = in[i:len(in):len(in)]; len(in) > 3 { - words := (*[1]uint32)(unsafe.Pointer(&in[0])) - h ^= uint64(words[0]) * prime64x1 - h = rotl64_23(h)*prime64x2 + prime64x3 - - in = in[4:len(in):len(in)] - } - - for _, b := range in { - h ^= uint64(b) * prime64x5 - h = rotl64_11(h) * prime64x1 - } - - return mix64(h) -} - -func (xx *XXHash64) Write(in []byte) (n int, err error) { - mem, idx := xx.mem[:], int(xx.memIdx) - - xx.ln, n = xx.ln+uint64(len(in)), len(in) - - if idx+len(in) < 32 { - xx.memIdx += int8(copy(mem[idx:len(mem):len(mem)], in)) - return - } - - var ( - v1, v2, v3, v4 = xx.v1, xx.v2, xx.v3, xx.v4 - - i int - ) - - if d := 32 - int(idx); d > 0 && int(idx)+len(in) > 31 { - copy(mem[idx:len(mem):len(mem)], in[:len(in):len(in)]) - - words := (*[4]uint64)(unsafe.Pointer(&mem[0])) - - v1 = round64(v1, words[0]) - v2 = round64(v2, words[1]) - v3 = round64(v3, words[2]) - v4 = round64(v4, words[3]) - - if in, xx.memIdx = in[d:len(in):len(in)], 0; len(in) == 0 { - goto RET - } - } - - for ; i < len(in)-31; i += 32 { - words := (*[4]uint64)(unsafe.Pointer(&in[i])) - - v1 = round64(v1, words[0]) - v2 = round64(v2, words[1]) - v3 = round64(v3, words[2]) - v4 = round64(v4, words[3]) - } - - if len(in)-i != 0 { - xx.memIdx += int8(copy(mem[xx.memIdx:len(mem):len(mem)], in[i:len(in):len(in)])) - } - -RET: - xx.v1, xx.v2, xx.v3, xx.v4 = v1, v2, v3, v4 - - return -} - -func (xx *XXHash64) Sum64() (h uint64) { - if seed := xx.seed; xx.ln > 31 { - v1, v2, v3, v4 := xx.v1, xx.v2, xx.v3, xx.v4 - h = rotl64_1(v1) + rotl64_7(v2) + rotl64_12(v3) + rotl64_18(v4) - - h = mergeRound64(h, v1) - h = mergeRound64(h, v2) - h = mergeRound64(h, v3) - h = mergeRound64(h, v4) - } else if seed == 0 { - h = prime64x5 - } else { - h = seed + prime64x5 - } - - h += uint64(xx.ln) - - if xx.memIdx == 0 { - return mix64(h) - } - - var ( - in = xx.mem[:xx.memIdx:xx.memIdx] - wordsLen = len(in) >> 3 - words = ((*[maxInt32 / 8]uint64)(unsafe.Pointer(&in[0])))[:wordsLen:wordsLen] - ) - - for _, k := range words { - h ^= round64(0, k) - h = rotl64_27(h)*prime64x1 + prime64x4 - } - - if in = in[wordsLen<<3 : len(in) : len(in)]; len(in) > 3 { - words := (*[1]uint32)(unsafe.Pointer(&in[0])) - - h ^= uint64(words[0]) * prime64x1 - h = rotl64_23(h)*prime64x2 + prime64x3 - - in = in[4:len(in):len(in)] - } - - for _, b := range in { - h ^= uint64(b) * prime64x5 - h = rotl64_11(h) * prime64x1 - } - - return mix64(h) -} diff --git a/vendor/github.com/buengese/xxh32/go.mod b/vendor/github.com/buengese/xxh32/go.mod new file mode 100644 index 000000000..4dcc8bc70 --- /dev/null +++ b/vendor/github.com/buengese/xxh32/go.mod @@ -0,0 +1,5 @@ +module github.com/buengese/xxh32 + +go 1.13 + +require github.com/frankban/quicktest v1.7.3 diff --git a/vendor/github.com/buengese/xxh32/go.sum b/vendor/github.com/buengese/xxh32/go.sum new file mode 100644 index 000000000..ba648eb32 --- /dev/null +++ b/vendor/github.com/buengese/xxh32/go.sum @@ -0,0 +1,11 @@ +github.com/frankban/quicktest v1.7.3 h1:kV0lw0TH1j1hozahVmcpFCsbV5hcS4ZalH+U7UoeTow= +github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE1sVDXkjnig= +github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/vendor/github.com/buengese/xxh32/xxh32zero.go b/vendor/github.com/buengese/xxh32/xxh32zero.go new file mode 100644 index 000000000..7a76a6bce --- /dev/null +++ b/vendor/github.com/buengese/xxh32/xxh32zero.go @@ -0,0 +1,223 @@ +// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version). +// (https://github.com/Cyan4973/XXH/) +package xxh32 + +import ( + "encoding/binary" +) + +const ( + prime1 uint32 = 2654435761 + prime2 uint32 = 2246822519 + prime3 uint32 = 3266489917 + prime4 uint32 = 668265263 + prime5 uint32 = 374761393 + + primeMask = 0xFFFFFFFF + prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984 + prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535 +) + +// XXHZero represents an xxhash32 object with seed 0. +type XXHZero struct { + v1 uint32 + v2 uint32 + v3 uint32 + v4 uint32 + totalLen uint64 + buf [16]byte + bufused int +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (xxh XXHZero) Sum(b []byte) []byte { + h32 := xxh.Sum32() + return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24)) +} + +// Reset resets the Hash to its initial state. +func (xxh *XXHZero) Reset() { + xxh.v1 = prime1plus2 + xxh.v2 = prime2 + xxh.v3 = 0 + xxh.v4 = prime1minus + xxh.totalLen = 0 + xxh.bufused = 0 +} + +// Size returns the number of bytes returned by Sum(). +func (xxh *XXHZero) Size() int { + return 4 +} + +// BlockSize gives the minimum number of bytes accepted by Write(). +func (xxh *XXHZero) BlockSize() int { + return 1 +} + +// Write adds input bytes to the Hash. +// It never returns an error. +func (xxh *XXHZero) Write(input []byte) (int, error) { + if xxh.totalLen == 0 { + xxh.Reset() + } + n := len(input) + m := xxh.bufused + + xxh.totalLen += uint64(n) + + r := len(xxh.buf) - m + if n < r { + copy(xxh.buf[m:], input) + xxh.bufused += len(input) + return n, nil + } + + p := 0 + // Causes compiler to work directly from registers instead of stack: + v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4 + if m > 0 { + // some data left from previous update + copy(xxh.buf[xxh.bufused:], input[:r]) + xxh.bufused += len(input) - r + + // fast rotl(13) + buf := xxh.buf[:16] // BCE hint. + v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1 + p = r + xxh.bufused = 0 + } + + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1 + } + xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4 + + copy(xxh.buf[xxh.bufused:], input[p:]) + xxh.bufused += len(input) - p + + return n, nil +} + +// Sum32 returns the 32 bits Hash value. +func (xxh *XXHZero) Sum32() uint32 { + h32 := uint32(xxh.totalLen) + if h32 >= 16 { + h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4) + } else { + h32 += prime5 + } + + p := 0 + n := xxh.bufused + buf := xxh.buf + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3 + h32 = rol17(h32) * prime4 + } + for ; p < n; p++ { + h32 += uint32(buf[p]) * prime5 + h32 = rol11(h32) * prime1 + } + + h32 ^= h32 >> 15 + h32 *= prime2 + h32 ^= h32 >> 13 + h32 *= prime3 + h32 ^= h32 >> 16 + + return h32 +} + +// ChecksumZero returns the 32bits Hash value. +func ChecksumZero(input []byte) uint32 { + n := len(input) + h32 := uint32(n) + + if n < 16 { + h32 += prime5 + } else { + v1 := prime1plus2 + v2 := prime2 + v3 := uint32(0) + v4 := prime1minus + p := 0 + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1 + } + input = input[p:] + n -= p + h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + } + + p := 0 + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3 + h32 = rol17(h32) * prime4 + } + for p < n { + h32 += uint32(input[p]) * prime5 + h32 = rol11(h32) * prime1 + p++ + } + + h32 ^= h32 >> 15 + h32 *= prime2 + h32 ^= h32 >> 13 + h32 *= prime3 + h32 ^= h32 >> 16 + + return h32 +} + +// Uint32Zero hashes x with seed 0. +func Uint32Zero(x uint32) uint32 { + h := prime5 + 4 + x*prime3 + h = rol17(h) * prime4 + h ^= h >> 15 + h *= prime2 + h ^= h >> 13 + h *= prime3 + h ^= h >> 16 + return h +} + +func rol1(u uint32) uint32 { + return u<<1 | u>>31 +} + +func rol7(u uint32) uint32 { + return u<<7 | u>>25 +} + +func rol11(u uint32) uint32 { + return u<<11 | u>>21 +} + +func rol12(u uint32) uint32 { + return u<<12 | u>>20 +} + +func rol13(u uint32) uint32 { + return u<<13 | u>>19 +} + +func rol17(u uint32) uint32 { + return u<<17 | u>>15 +} + +func rol18(u uint32) uint32 { + return u<<18 | u>>14 +} diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore deleted file mode 100644 index 042091d9b..000000000 --- a/vendor/github.com/golang/snappy/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -cmd/snappytool/snappytool -testdata/bench - -# These explicitly listed benchmark data files are for an obsolete version of -# snappy_test.go. -testdata/alice29.txt -testdata/asyoulik.txt -testdata/fireworks.jpeg -testdata/geo.protodata -testdata/html -testdata/html_x_4 -testdata/kppkn.gtb -testdata/lcet10.txt -testdata/paper-100k.pdf -testdata/plrabn12.txt -testdata/urls.10K diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS deleted file mode 100644 index bcfa19520..000000000 --- a/vendor/github.com/golang/snappy/AUTHORS +++ /dev/null @@ -1,15 +0,0 @@ -# This is the official list of Snappy-Go authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. - -# Names should be added to this file as -# Name or Organization -# The email address is not required for organizations. - -# Please keep the list sorted. - -Damian Gryski -Google Inc. -Jan Mercl <0xjnml@gmail.com> -Rodolfo Carvalho -Sebastien Binet diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS deleted file mode 100644 index 931ae3160..000000000 --- a/vendor/github.com/golang/snappy/CONTRIBUTORS +++ /dev/null @@ -1,37 +0,0 @@ -# This is the official list of people who can contribute -# (and typically have contributed) code to the Snappy-Go repository. -# The AUTHORS file lists the copyright holders; this file -# lists people. For example, Google employees are listed here -# but not in AUTHORS, because Google holds the copyright. -# -# The submission process automatically checks to make sure -# that people submitting code are listed in this file (by email address). -# -# Names should be added to this file only after verifying that -# the individual or the individual's organization has agreed to -# the appropriate Contributor License Agreement, found here: -# -# http://code.google.com/legal/individual-cla-v1.0.html -# http://code.google.com/legal/corporate-cla-v1.0.html -# -# The agreement for individuals can be filled out on the web. -# -# When adding J Random Contributor's name to this file, -# either J's name or J's organization's name should be -# added to the AUTHORS file, depending on whether the -# individual or corporate CLA was used. - -# Names should be added to this file like so: -# Name - -# Please keep the list sorted. - -Damian Gryski -Jan Mercl <0xjnml@gmail.com> -Kai Backman -Marc-Antoine Ruel -Nigel Tao -Rob Pike -Rodolfo Carvalho -Russ Cox -Sebastien Binet diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README deleted file mode 100644 index cea12879a..000000000 --- a/vendor/github.com/golang/snappy/README +++ /dev/null @@ -1,107 +0,0 @@ -The Snappy compression format in the Go programming language. - -To download and install from source: -$ go get github.com/golang/snappy - -Unless otherwise noted, the Snappy-Go source files are distributed -under the BSD-style license found in the LICENSE file. - - - -Benchmarks. - -The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten -or so files, the same set used by the C++ Snappy code (github.com/google/snappy -and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ -3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: - -"go test -test.bench=." - -_UFlat0-8 2.19GB/s ± 0% html -_UFlat1-8 1.41GB/s ± 0% urls -_UFlat2-8 23.5GB/s ± 2% jpg -_UFlat3-8 1.91GB/s ± 0% jpg_200 -_UFlat4-8 14.0GB/s ± 1% pdf -_UFlat5-8 1.97GB/s ± 0% html4 -_UFlat6-8 814MB/s ± 0% txt1 -_UFlat7-8 785MB/s ± 0% txt2 -_UFlat8-8 857MB/s ± 0% txt3 -_UFlat9-8 719MB/s ± 1% txt4 -_UFlat10-8 2.84GB/s ± 0% pb -_UFlat11-8 1.05GB/s ± 0% gaviota - -_ZFlat0-8 1.04GB/s ± 0% html -_ZFlat1-8 534MB/s ± 0% urls -_ZFlat2-8 15.7GB/s ± 1% jpg -_ZFlat3-8 740MB/s ± 3% jpg_200 -_ZFlat4-8 9.20GB/s ± 1% pdf -_ZFlat5-8 991MB/s ± 0% html4 -_ZFlat6-8 379MB/s ± 0% txt1 -_ZFlat7-8 352MB/s ± 0% txt2 -_ZFlat8-8 396MB/s ± 1% txt3 -_ZFlat9-8 327MB/s ± 1% txt4 -_ZFlat10-8 1.33GB/s ± 1% pb -_ZFlat11-8 605MB/s ± 1% gaviota - - - -"go test -test.bench=. -tags=noasm" - -_UFlat0-8 621MB/s ± 2% html -_UFlat1-8 494MB/s ± 1% urls -_UFlat2-8 23.2GB/s ± 1% jpg -_UFlat3-8 1.12GB/s ± 1% jpg_200 -_UFlat4-8 4.35GB/s ± 1% pdf -_UFlat5-8 609MB/s ± 0% html4 -_UFlat6-8 296MB/s ± 0% txt1 -_UFlat7-8 288MB/s ± 0% txt2 -_UFlat8-8 309MB/s ± 1% txt3 -_UFlat9-8 280MB/s ± 1% txt4 -_UFlat10-8 753MB/s ± 0% pb -_UFlat11-8 400MB/s ± 0% gaviota - -_ZFlat0-8 409MB/s ± 1% html -_ZFlat1-8 250MB/s ± 1% urls -_ZFlat2-8 12.3GB/s ± 1% jpg -_ZFlat3-8 132MB/s ± 0% jpg_200 -_ZFlat4-8 2.92GB/s ± 0% pdf -_ZFlat5-8 405MB/s ± 1% html4 -_ZFlat6-8 179MB/s ± 1% txt1 -_ZFlat7-8 170MB/s ± 1% txt2 -_ZFlat8-8 189MB/s ± 1% txt3 -_ZFlat9-8 164MB/s ± 1% txt4 -_ZFlat10-8 479MB/s ± 1% pb -_ZFlat11-8 270MB/s ± 1% gaviota - - - -For comparison (Go's encoded output is byte-for-byte identical to C++'s), here -are the numbers from C++ Snappy's - -make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log - -BM_UFlat/0 2.4GB/s html -BM_UFlat/1 1.4GB/s urls -BM_UFlat/2 21.8GB/s jpg -BM_UFlat/3 1.5GB/s jpg_200 -BM_UFlat/4 13.3GB/s pdf -BM_UFlat/5 2.1GB/s html4 -BM_UFlat/6 1.0GB/s txt1 -BM_UFlat/7 959.4MB/s txt2 -BM_UFlat/8 1.0GB/s txt3 -BM_UFlat/9 864.5MB/s txt4 -BM_UFlat/10 2.9GB/s pb -BM_UFlat/11 1.2GB/s gaviota - -BM_ZFlat/0 944.3MB/s html (22.31 %) -BM_ZFlat/1 501.6MB/s urls (47.78 %) -BM_ZFlat/2 14.3GB/s jpg (99.95 %) -BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) -BM_ZFlat/4 8.3GB/s pdf (83.30 %) -BM_ZFlat/5 903.5MB/s html4 (22.52 %) -BM_ZFlat/6 336.0MB/s txt1 (57.88 %) -BM_ZFlat/7 312.3MB/s txt2 (61.91 %) -BM_ZFlat/8 353.1MB/s txt3 (54.99 %) -BM_ZFlat/9 289.9MB/s txt4 (66.26 %) -BM_ZFlat/10 1.2GB/s pb (19.68 %) -BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go deleted file mode 100644 index 72efb0353..000000000 --- a/vendor/github.com/golang/snappy/decode.go +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snappy - -import ( - "encoding/binary" - "errors" - "io" -) - -var ( - // ErrCorrupt reports that the input is invalid. - ErrCorrupt = errors.New("snappy: corrupt input") - // ErrTooLarge reports that the uncompressed length is too large. - ErrTooLarge = errors.New("snappy: decoded block is too large") - // ErrUnsupported reports that the input isn't supported. - ErrUnsupported = errors.New("snappy: unsupported input") - - errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") -) - -// DecodedLen returns the length of the decoded block. -func DecodedLen(src []byte) (int, error) { - v, _, err := decodedLen(src) - return v, err -} - -// decodedLen returns the length of the decoded block and the number of bytes -// that the length header occupied. -func decodedLen(src []byte) (blockLen, headerLen int, err error) { - v, n := binary.Uvarint(src) - if n <= 0 || v > 0xffffffff { - return 0, 0, ErrCorrupt - } - - const wordSize = 32 << (^uint(0) >> 32 & 1) - if wordSize == 32 && v > 0x7fffffff { - return 0, 0, ErrTooLarge - } - return int(v), n, nil -} - -const ( - decodeErrCodeCorrupt = 1 - decodeErrCodeUnsupportedLiteralLength = 2 -) - -// Decode returns the decoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire decoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -func Decode(dst, src []byte) ([]byte, error) { - dLen, s, err := decodedLen(src) - if err != nil { - return nil, err - } - if dLen <= len(dst) { - dst = dst[:dLen] - } else { - dst = make([]byte, dLen) - } - switch decode(dst, src[s:]) { - case 0: - return dst, nil - case decodeErrCodeUnsupportedLiteralLength: - return nil, errUnsupportedLiteralLength - } - return nil, ErrCorrupt -} - -// NewReader returns a new Reader that decompresses from r, using the framing -// format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -func NewReader(r io.Reader) *Reader { - return &Reader{ - r: r, - decoded: make([]byte, maxBlockSize), - buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), - } -} - -// Reader is an io.Reader that can read Snappy-compressed bytes. -type Reader struct { - r io.Reader - err error - decoded []byte - buf []byte - // decoded[i:j] contains decoded bytes that have not yet been passed on. - i, j int - readHeader bool -} - -// Reset discards any buffered data, resets all state, and switches the Snappy -// reader to read from r. This permits reusing a Reader rather than allocating -// a new one. -func (r *Reader) Reset(reader io.Reader) { - r.r = reader - r.err = nil - r.i = 0 - r.j = 0 - r.readHeader = false -} - -func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { - if _, r.err = io.ReadFull(r.r, p); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt - } - return false - } - return true -} - -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - for { - if r.i < r.j { - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil - } - if !r.readFull(r.buf[:4], true) { - return 0, r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return 0, r.err - } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - if chunkLen > len(r.buf) { - r.err = ErrUnsupported - return 0, r.err - } - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - n, err := DecodedLen(buf) - if err != nil { - r.err = err - return 0, r.err - } - if n > len(r.decoded) { - r.err = ErrCorrupt - return 0, r.err - } - if _, err := Decode(r.decoded, buf); err != nil { - r.err = err - return 0, r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return 0, r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeUncompressedData: - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - buf := r.buf[:checksumSize] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n := chunkLen - checksumSize - if n > len(r.decoded) { - r.err = ErrCorrupt - return 0, r.err - } - if !r.readFull(r.decoded[:n], false) { - return 0, r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return 0, r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return 0, r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err - } - for i := 0; i < len(magicBody); i++ { - if r.buf[i] != magicBody[i] { - r.err = ErrCorrupt - return 0, r.err - } - } - continue - } - - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - r.err = ErrUnsupported - return 0, r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if !r.readFull(r.buf[:chunkLen], false) { - return 0, r.err - } - } -} diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go deleted file mode 100644 index fcd192b84..000000000 --- a/vendor/github.com/golang/snappy/decode_amd64.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -package snappy - -// decode has the same semantics as in decode_other.go. -// -//go:noescape -func decode(dst, src []byte) int diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s deleted file mode 100644 index e6179f65e..000000000 --- a/vendor/github.com/golang/snappy/decode_amd64.s +++ /dev/null @@ -1,490 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The asm code generally follows the pure Go code in decode_other.go, except -// where marked with a "!!!". - -// func decode(dst, src []byte) int -// -// All local variables fit into registers. The non-zero stack size is only to -// spill registers and push args when issuing a CALL. The register allocation: -// - AX scratch -// - BX scratch -// - CX length or x -// - DX offset -// - SI &src[s] -// - DI &dst[d] -// + R8 dst_base -// + R9 dst_len -// + R10 dst_base + dst_len -// + R11 src_base -// + R12 src_len -// + R13 src_base + src_len -// - R14 used by doCopy -// - R15 used by doCopy -// -// The registers R8-R13 (marked with a "+") are set at the start of the -// function, and after a CALL returns, and are not otherwise modified. -// -// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. -// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. -TEXT ·decode(SB), NOSPLIT, $48-56 - // Initialize SI, DI and R8-R13. - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, DI - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, SI - MOVQ R11, R13 - ADDQ R12, R13 - -loop: - // for s < len(src) - CMPQ SI, R13 - JEQ end - - // CX = uint32(src[s]) - // - // switch src[s] & 0x03 - MOVBLZX (SI), CX - MOVL CX, BX - ANDL $3, BX - CMPL BX, $1 - JAE tagCopy - - // ---------------------------------------- - // The code below handles literal tags. - - // case tagLiteral: - // x := uint32(src[s] >> 2) - // switch - SHRL $2, CX - CMPL CX, $60 - JAE tagLit60Plus - - // case x < 60: - // s++ - INCQ SI - -doLit: - // This is the end of the inner "switch", when we have a literal tag. - // - // We assume that CX == x and x fits in a uint32, where x is the variable - // used in the pure Go decode_other.go code. - - // length = int(x) + 1 - // - // Unlike the pure Go code, we don't need to check if length <= 0 because - // CX can hold 64 bits, so the increment cannot overflow. - INCQ CX - - // Prepare to check if copying length bytes will run past the end of dst or - // src. - // - // AX = len(dst) - d - // BX = len(src) - s - MOVQ R10, AX - SUBQ DI, AX - MOVQ R13, BX - SUBQ SI, BX - - // !!! Try a faster technique for short (16 or fewer bytes) copies. - // - // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { - // goto callMemmove // Fall back on calling runtime·memmove. - // } - // - // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s - // against 21 instead of 16, because it cannot assume that all of its input - // is contiguous in memory and so it needs to leave enough source bytes to - // read the next tag without refilling buffers, but Go's Decode assumes - // contiguousness (the src argument is a []byte). - CMPQ CX, $16 - JGT callMemmove - CMPQ AX, $16 - JLT callMemmove - CMPQ BX, $16 - JLT callMemmove - - // !!! Implement the copy from src to dst as a 16-byte load and store. - // (Decode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only length bytes, but that's - // OK. If the input is a valid Snappy encoding then subsequent iterations - // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a - // non-nil error), so the overrun will be ignored. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(SI), X0 - MOVOU X0, 0(DI) - - // d += length - // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop - -callMemmove: - // if length > len(dst)-d || length > len(src)-s { etc } - CMPQ CX, AX - JGT errCorrupt - CMPQ CX, BX - JGT errCorrupt - - // copy(dst[d:], src[s:s+length]) - // - // This means calling runtime·memmove(&dst[d], &src[s], length), so we push - // DI, SI and CX as arguments. Coincidentally, we also need to spill those - // three registers to the stack, to save local variables across the CALL. - MOVQ DI, 0(SP) - MOVQ SI, 8(SP) - MOVQ CX, 16(SP) - MOVQ DI, 24(SP) - MOVQ SI, 32(SP) - MOVQ CX, 40(SP) - CALL runtime·memmove(SB) - - // Restore local variables: unspill registers from the stack and - // re-calculate R8-R13. - MOVQ 24(SP), DI - MOVQ 32(SP), SI - MOVQ 40(SP), CX - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, R13 - ADDQ R12, R13 - - // d += length - // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop - -tagLit60Plus: - // !!! This fragment does the - // - // s += x - 58; if uint(s) > uint(len(src)) { etc } - // - // checks. In the asm version, we code it once instead of once per switch case. - ADDQ CX, SI - SUBQ $58, SI - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // case x == 60: - CMPL CX, $61 - JEQ tagLit61 - JA tagLit62Plus - - // x = uint32(src[s-1]) - MOVBLZX -1(SI), CX - JMP doLit - -tagLit61: - // case x == 61: - // x = uint32(src[s-2]) | uint32(src[s-1])<<8 - MOVWLZX -2(SI), CX - JMP doLit - -tagLit62Plus: - CMPL CX, $62 - JA tagLit63 - - // case x == 62: - // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - MOVWLZX -3(SI), CX - MOVBLZX -1(SI), BX - SHLL $16, BX - ORL BX, CX - JMP doLit - -tagLit63: - // case x == 63: - // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - MOVL -4(SI), CX - JMP doLit - -// The code above handles literal tags. -// ---------------------------------------- -// The code below handles copy tags. - -tagCopy4: - // case tagCopy4: - // s += 5 - ADDQ $5, SI - - // if uint(s) > uint(len(src)) { etc } - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // length = 1 + int(src[s-5])>>2 - SHRQ $2, CX - INCQ CX - - // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - MOVLQZX -4(SI), DX - JMP doCopy - -tagCopy2: - // case tagCopy2: - // s += 3 - ADDQ $3, SI - - // if uint(s) > uint(len(src)) { etc } - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // length = 1 + int(src[s-3])>>2 - SHRQ $2, CX - INCQ CX - - // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - MOVWQZX -2(SI), DX - JMP doCopy - -tagCopy: - // We have a copy tag. We assume that: - // - BX == src[s] & 0x03 - // - CX == src[s] - CMPQ BX, $2 - JEQ tagCopy2 - JA tagCopy4 - - // case tagCopy1: - // s += 2 - ADDQ $2, SI - - // if uint(s) > uint(len(src)) { etc } - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - MOVQ CX, DX - ANDQ $0xe0, DX - SHLQ $3, DX - MOVBQZX -1(SI), BX - ORQ BX, DX - - // length = 4 + int(src[s-2])>>2&0x7 - SHRQ $2, CX - ANDQ $7, CX - ADDQ $4, CX - -doCopy: - // This is the end of the outer "switch", when we have a copy tag. - // - // We assume that: - // - CX == length && CX > 0 - // - DX == offset - - // if offset <= 0 { etc } - CMPQ DX, $0 - JLE errCorrupt - - // if d < offset { etc } - MOVQ DI, BX - SUBQ R8, BX - CMPQ BX, DX - JLT errCorrupt - - // if length > len(dst)-d { etc } - MOVQ R10, BX - SUBQ DI, BX - CMPQ CX, BX - JGT errCorrupt - - // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length - // - // Set: - // - R14 = len(dst)-d - // - R15 = &dst[d-offset] - MOVQ R10, R14 - SUBQ DI, R14 - MOVQ DI, R15 - SUBQ DX, R15 - - // !!! Try a faster technique for short (16 or fewer bytes) forward copies. - // - // First, try using two 8-byte load/stores, similar to the doLit technique - // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is - // still OK if offset >= 8. Note that this has to be two 8-byte load/stores - // and not one 16-byte load/store, and the first store has to be before the - // second load, due to the overlap if offset is in the range [8, 16). - // - // if length > 16 || offset < 8 || len(dst)-d < 16 { - // goto slowForwardCopy - // } - // copy 16 bytes - // d += length - CMPQ CX, $16 - JGT slowForwardCopy - CMPQ DX, $8 - JLT slowForwardCopy - CMPQ R14, $16 - JLT slowForwardCopy - MOVQ 0(R15), AX - MOVQ AX, 0(DI) - MOVQ 8(R15), BX - MOVQ BX, 8(DI) - ADDQ CX, DI - JMP loop - -slowForwardCopy: - // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we - // can still try 8-byte load stores, provided we can overrun up to 10 extra - // bytes. As above, the overrun will be fixed up by subsequent iterations - // of the outermost loop. - // - // The C++ snappy code calls this technique IncrementalCopyFastPath. Its - // commentary says: - // - // ---- - // - // The main part of this loop is a simple copy of eight bytes at a time - // until we've copied (at least) the requested amount of bytes. However, - // if d and d-offset are less than eight bytes apart (indicating a - // repeating pattern of length < 8), we first need to expand the pattern in - // order to get the correct results. For instance, if the buffer looks like - // this, with the eight-byte and patterns marked as - // intervals: - // - // abxxxxxxxxxxxx - // [------] d-offset - // [------] d - // - // a single eight-byte copy from to will repeat the pattern - // once, after which we can move two bytes without moving : - // - // ababxxxxxxxxxx - // [------] d-offset - // [------] d - // - // and repeat the exercise until the two no longer overlap. - // - // This allows us to do very well in the special case of one single byte - // repeated many times, without taking a big hit for more general cases. - // - // The worst case of extra writing past the end of the match occurs when - // offset == 1 and length == 1; the last copy will read from byte positions - // [0..7] and write to [4..11], whereas it was only supposed to write to - // position 1. Thus, ten excess bytes. - // - // ---- - // - // That "10 byte overrun" worst case is confirmed by Go's - // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy - // and finishSlowForwardCopy algorithm. - // - // if length > len(dst)-d-10 { - // goto verySlowForwardCopy - // } - SUBQ $10, R14 - CMPQ CX, R14 - JGT verySlowForwardCopy - -makeOffsetAtLeast8: - // !!! As above, expand the pattern so that offset >= 8 and we can use - // 8-byte load/stores. - // - // for offset < 8 { - // copy 8 bytes from dst[d-offset:] to dst[d:] - // length -= offset - // d += offset - // offset += offset - // // The two previous lines together means that d-offset, and therefore - // // R15, is unchanged. - // } - CMPQ DX, $8 - JGE fixUpSlowForwardCopy - MOVQ (R15), BX - MOVQ BX, (DI) - SUBQ DX, CX - ADDQ DX, DI - ADDQ DX, DX - JMP makeOffsetAtLeast8 - -fixUpSlowForwardCopy: - // !!! Add length (which might be negative now) to d (implied by DI being - // &dst[d]) so that d ends up at the right place when we jump back to the - // top of the loop. Before we do that, though, we save DI to AX so that, if - // length is positive, copying the remaining length bytes will write to the - // right place. - MOVQ DI, AX - ADDQ CX, DI - -finishSlowForwardCopy: - // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative - // length means that we overrun, but as above, that will be fixed up by - // subsequent iterations of the outermost loop. - CMPQ CX, $0 - JLE loop - MOVQ (R15), BX - MOVQ BX, (AX) - ADDQ $8, R15 - ADDQ $8, AX - SUBQ $8, CX - JMP finishSlowForwardCopy - -verySlowForwardCopy: - // verySlowForwardCopy is a simple implementation of forward copy. In C - // parlance, this is a do/while loop instead of a while loop, since we know - // that length > 0. In Go syntax: - // - // for { - // dst[d] = dst[d - offset] - // d++ - // length-- - // if length == 0 { - // break - // } - // } - MOVB (R15), BX - MOVB BX, (DI) - INCQ R15 - INCQ DI - DECQ CX - JNZ verySlowForwardCopy - JMP loop - -// The code above handles copy tags. -// ---------------------------------------- - -end: - // This is the end of the "for s < len(src)". - // - // if d != len(dst) { etc } - CMPQ DI, R10 - JNE errCorrupt - - // return 0 - MOVQ $0, ret+48(FP) - RET - -errCorrupt: - // return decodeErrCodeCorrupt - MOVQ $1, ret+48(FP) - RET diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go deleted file mode 100644 index 8c9f2049b..000000000 --- a/vendor/github.com/golang/snappy/decode_other.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64 appengine !gc noasm - -package snappy - -// decode writes the decoding of src to dst. It assumes that the varint-encoded -// length of the decompressed bytes has already been read, and that len(dst) -// equals that length. -// -// It returns 0 on success or a decodeErrCodeXxx error code on failure. -func decode(dst, src []byte) int { - var d, s, offset, length int - for s < len(src) { - switch src[s] & 0x03 { - case tagLiteral: - x := uint32(src[s] >> 2) - switch { - case x < 60: - s++ - case x == 60: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-1]) - case x == 61: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-2]) | uint32(src[s-1])<<8 - case x == 62: - s += 4 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - case x == 63: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - } - length = int(x) + 1 - if length <= 0 { - return decodeErrCodeUnsupportedLiteralLength - } - if length > len(dst)-d || length > len(src)-s { - return decodeErrCodeCorrupt - } - copy(dst[d:], src[s:s+length]) - d += length - s += length - continue - - case tagCopy1: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 4 + int(src[s-2])>>2&0x7 - offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - - case tagCopy2: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-3])>>2 - offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - - case tagCopy4: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-5])>>2 - offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - } - - if offset <= 0 || d < offset || length > len(dst)-d { - return decodeErrCodeCorrupt - } - // Copy from an earlier sub-slice of dst to a later sub-slice. Unlike - // the built-in copy function, this byte-by-byte copy always runs - // forwards, even if the slices overlap. Conceptually, this is: - // - // d += forwardCopy(dst[d:d+length], dst[d-offset:]) - for end := d + length; d != end; d++ { - dst[d] = dst[d-offset] - } - } - if d != len(dst) { - return decodeErrCodeCorrupt - } - return 0 -} diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go deleted file mode 100644 index 8d393e904..000000000 --- a/vendor/github.com/golang/snappy/encode.go +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snappy - -import ( - "encoding/binary" - "errors" - "io" -) - -// Encode returns the encoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire encoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -func Encode(dst, src []byte) []byte { - if n := MaxEncodedLen(len(src)); n < 0 { - panic(ErrTooLarge) - } else if len(dst) < n { - dst = make([]byte, n) - } - - // The block starts with the varint-encoded length of the decompressed bytes. - d := binary.PutUvarint(dst, uint64(len(src))) - - for len(src) > 0 { - p := src - src = nil - if len(p) > maxBlockSize { - p, src = p[:maxBlockSize], p[maxBlockSize:] - } - if len(p) < minNonLiteralBlockSize { - d += emitLiteral(dst[d:], p) - } else { - d += encodeBlock(dst[d:], p) - } - } - return dst[:d] -} - -// inputMargin is the minimum number of extra input bytes to keep, inside -// encodeBlock's inner loop. On some architectures, this margin lets us -// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) -// literals can be implemented as a single load to and store from a 16-byte -// register. That literal's actual length can be as short as 1 byte, so this -// can copy up to 15 bytes too much, but that's OK as subsequent iterations of -// the encoding loop will fix up the copy overrun, and this inputMargin ensures -// that we don't overrun the dst and src buffers. -const inputMargin = 16 - 1 - -// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that -// could be encoded with a copy tag. This is the minimum with respect to the -// algorithm used by encodeBlock, not a minimum enforced by the file format. -// -// The encoded output must start with at least a 1 byte literal, as there are -// no previous bytes to copy. A minimal (1 byte) copy after that, generated -// from an emitCopy call in encodeBlock's main loop, would require at least -// another inputMargin bytes, for the reason above: we want any emitLiteral -// calls inside encodeBlock's main loop to use the fast path if possible, which -// requires being able to overrun by inputMargin bytes. Thus, -// minNonLiteralBlockSize equals 1 + 1 + inputMargin. -// -// The C++ code doesn't use this exact threshold, but it could, as discussed at -// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion -// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an -// optimization. It should not affect the encoded form. This is tested by -// TestSameEncodingAsCppShortCopies. -const minNonLiteralBlockSize = 1 + 1 + inputMargin - -// MaxEncodedLen returns the maximum length of a snappy block, given its -// uncompressed length. -// -// It will return a negative value if srcLen is too large to encode. -func MaxEncodedLen(srcLen int) int { - n := uint64(srcLen) - if n > 0xffffffff { - return -1 - } - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // That is, 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - n = 32 + n + n/6 - if n > 0xffffffff { - return -1 - } - return int(n) -} - -var errClosed = errors.New("snappy: Writer is closed") - -// NewWriter returns a new Writer that compresses to w. -// -// The Writer returned does not buffer writes. There is no need to Flush or -// Close such a Writer. -// -// Deprecated: the Writer returned is not suitable for many small writes, only -// for few large writes. Use NewBufferedWriter instead, which is efficient -// regardless of the frequency and shape of the writes, and remember to Close -// that Writer when done. -func NewWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - obuf: make([]byte, obufLen), - } -} - -// NewBufferedWriter returns a new Writer that compresses to w, using the -// framing format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -// -// The Writer returned buffers writes. Users must call Close to guarantee all -// data has been forwarded to the underlying io.Writer. They may also call -// Flush zero or more times before calling Close. -func NewBufferedWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - ibuf: make([]byte, 0, maxBlockSize), - obuf: make([]byte, obufLen), - } -} - -// Writer is an io.Writer that can write Snappy-compressed bytes. -type Writer struct { - w io.Writer - err error - - // ibuf is a buffer for the incoming (uncompressed) bytes. - // - // Its use is optional. For backwards compatibility, Writers created by the - // NewWriter function have ibuf == nil, do not buffer incoming bytes, and - // therefore do not need to be Flush'ed or Close'd. - ibuf []byte - - // obuf is a buffer for the outgoing (compressed) bytes. - obuf []byte - - // wroteStreamHeader is whether we have written the stream header. - wroteStreamHeader bool -} - -// Reset discards the writer's state and switches the Snappy writer to write to -// w. This permits reusing a Writer rather than allocating a new one. -func (w *Writer) Reset(writer io.Writer) { - w.w = writer - w.err = nil - if w.ibuf != nil { - w.ibuf = w.ibuf[:0] - } - w.wroteStreamHeader = false -} - -// Write satisfies the io.Writer interface. -func (w *Writer) Write(p []byte) (nRet int, errRet error) { - if w.ibuf == nil { - // Do not buffer incoming bytes. This does not perform or compress well - // if the caller of Writer.Write writes many small slices. This - // behavior is therefore deprecated, but still supported for backwards - // compatibility with code that doesn't explicitly Flush or Close. - return w.write(p) - } - - // The remainder of this method is based on bufio.Writer.Write from the - // standard library. - - for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { - var n int - if len(w.ibuf) == 0 { - // Large write, empty buffer. - // Write directly from p to avoid copy. - n, _ = w.write(p) - } else { - n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - w.Flush() - } - nRet += n - p = p[n:] - } - if w.err != nil { - return nRet, w.err - } - n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - nRet += n - return nRet, nil -} - -func (w *Writer) write(p []byte) (nRet int, errRet error) { - if w.err != nil { - return 0, w.err - } - for len(p) > 0 { - obufStart := len(magicChunk) - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - copy(w.obuf, magicChunk) - obufStart = 0 - } - - var uncompressed []byte - if len(p) > maxBlockSize { - uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] - } else { - uncompressed, p = p, nil - } - checksum := crc(uncompressed) - - // Compress the buffer, discarding the result if the improvement - // isn't at least 12.5%. - compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) - chunkType := uint8(chunkTypeCompressedData) - chunkLen := 4 + len(compressed) - obufEnd := obufHeaderLen + len(compressed) - if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { - chunkType = chunkTypeUncompressedData - chunkLen = 4 + len(uncompressed) - obufEnd = obufHeaderLen - } - - // Fill in the per-chunk header that comes before the body. - w.obuf[len(magicChunk)+0] = chunkType - w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) - w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) - w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) - w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) - w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) - w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) - w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) - - if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { - w.err = err - return nRet, err - } - if chunkType == chunkTypeUncompressedData { - if _, err := w.w.Write(uncompressed); err != nil { - w.err = err - return nRet, err - } - } - nRet += len(uncompressed) - } - return nRet, nil -} - -// Flush flushes the Writer to its underlying io.Writer. -func (w *Writer) Flush() error { - if w.err != nil { - return w.err - } - if len(w.ibuf) == 0 { - return nil - } - w.write(w.ibuf) - w.ibuf = w.ibuf[:0] - return w.err -} - -// Close calls Flush and then closes the Writer. -func (w *Writer) Close() error { - w.Flush() - ret := w.err - if w.err == nil { - w.err = errClosed - } - return ret -} diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_amd64.go deleted file mode 100644 index 150d91bc8..000000000 --- a/vendor/github.com/golang/snappy/encode_amd64.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -package snappy - -// emitLiteral has the same semantics as in encode_other.go. -// -//go:noescape -func emitLiteral(dst, lit []byte) int - -// emitCopy has the same semantics as in encode_other.go. -// -//go:noescape -func emitCopy(dst []byte, offset, length int) int - -// extendMatch has the same semantics as in encode_other.go. -// -//go:noescape -func extendMatch(src []byte, i, j int) int - -// encodeBlock has the same semantics as in encode_other.go. -// -//go:noescape -func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s deleted file mode 100644 index adfd979fe..000000000 --- a/vendor/github.com/golang/snappy/encode_amd64.s +++ /dev/null @@ -1,730 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a -// Go toolchain regression. See https://github.com/golang/go/issues/15426 and -// https://github.com/golang/snappy/issues/29 -// -// As a workaround, the package was built with a known good assembler, and -// those instructions were disassembled by "objdump -d" to yield the -// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 -// style comments, in AT&T asm syntax. Note that rsp here is a physical -// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). -// The instructions were then encoded as "BYTE $0x.." sequences, which assemble -// fine on Go 1.6. - -// The asm code generally follows the pure Go code in encode_other.go, except -// where marked with a "!!!". - -// ---------------------------------------------------------------------------- - -// func emitLiteral(dst, lit []byte) int -// -// All local variables fit into registers. The register allocation: -// - AX len(lit) -// - BX n -// - DX return value -// - DI &dst[i] -// - R10 &lit[0] -// -// The 24 bytes of stack space is to call runtime·memmove. -// -// The unusual register allocation of local variables, such as R10 for the -// source pointer, matches the allocation used at the call site in encodeBlock, -// which makes it easier to manually inline this function. -TEXT ·emitLiteral(SB), NOSPLIT, $24-56 - MOVQ dst_base+0(FP), DI - MOVQ lit_base+24(FP), R10 - MOVQ lit_len+32(FP), AX - MOVQ AX, DX - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT oneByte - CMPL BX, $256 - JLT twoBytes - -threeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - ADDQ $3, DX - JMP memmove - -twoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - ADDQ $2, DX - JMP memmove - -oneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - ADDQ $1, DX - -memmove: - MOVQ DX, ret+48(FP) - - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - CALL runtime·memmove(SB) - RET - -// ---------------------------------------------------------------------------- - -// func emitCopy(dst []byte, offset, length int) int -// -// All local variables fit into registers. The register allocation: -// - AX length -// - SI &dst[0] -// - DI &dst[i] -// - R11 offset -// -// The unusual register allocation of local variables, such as R11 for the -// offset, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·emitCopy(SB), NOSPLIT, $0-48 - MOVQ dst_base+0(FP), DI - MOVQ DI, SI - MOVQ offset+24(FP), R11 - MOVQ length+32(FP), AX - -loop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT step1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP loop0 - -step1: - // if length > 64 { etc } - CMPL AX, $64 - JLE step2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -step2: - // if length >= 12 || offset >= 2048 { goto step3 } - CMPL AX, $12 - JGE step3 - CMPL R11, $2048 - JGE step3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -step3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func extendMatch(src []byte, i, j int) int -// -// All local variables fit into registers. The register allocation: -// - DX &src[0] -// - SI &src[j] -// - R13 &src[len(src) - 8] -// - R14 &src[len(src)] -// - R15 &src[i] -// -// The unusual register allocation of local variables, such as R15 for a source -// pointer, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·extendMatch(SB), NOSPLIT, $0-48 - MOVQ src_base+0(FP), DX - MOVQ src_len+8(FP), R14 - MOVQ i+24(FP), R15 - MOVQ j+32(FP), SI - ADDQ DX, R14 - ADDQ DX, R15 - ADDQ DX, SI - MOVQ R14, R13 - SUBQ $8, R13 - -cmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA cmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE bsf - ADDQ $8, R15 - ADDQ $8, SI - JMP cmp8 - -bsf: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -cmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE extendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE extendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP cmp1 - -extendMatchEnd: - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func encodeBlock(dst, src []byte) (d int) -// -// All local variables fit into registers, other than "var table". The register -// allocation: -// - AX . . -// - BX . . -// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). -// - DX 64 &src[0], tableSize -// - SI 72 &src[s] -// - DI 80 &dst[d] -// - R9 88 sLimit -// - R10 . &src[nextEmit] -// - R11 96 prevHash, currHash, nextHash, offset -// - R12 104 &src[base], skip -// - R13 . &src[nextS], &src[len(src) - 8] -// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x -// - R15 112 candidate -// -// The second column (56, 64, etc) is the stack offset to spill the registers -// when calling other functions. We could pack this slightly tighter, but it's -// simpler to have a dedicated spill map independent of the function called. -// -// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An -// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill -// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. -TEXT ·encodeBlock(SB), 0, $32888-56 - MOVQ dst_base+0(FP), DI - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R14 - - // shift, tableSize := uint32(32-8), 1<<8 - MOVQ $24, CX - MOVQ $256, DX - -calcShift: - // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - // shift-- - // } - CMPQ DX, $16384 - JGE varTable - CMPQ DX, R14 - JGE varTable - SUBQ $1, CX - SHLQ $1, DX - JMP calcShift - -varTable: - // var table [maxTableSize]uint16 - // - // In the asm code, unlike the Go code, we can zero-initialize only the - // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU - // writes 16 bytes, so we can do only tableSize/8 writes instead of the - // 2048 writes that would zero-initialize all of table's 32768 bytes. - SHRQ $3, DX - LEAQ table-32768(SP), BX - PXOR X0, X0 - -memclr: - MOVOU X0, 0(BX) - ADDQ $16, BX - SUBQ $1, DX - JNZ memclr - - // !!! DX = &src[0] - MOVQ SI, DX - - // sLimit := len(src) - inputMargin - MOVQ R14, R9 - SUBQ $15, R9 - - // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't - // change for the rest of the function. - MOVQ CX, 56(SP) - MOVQ DX, 64(SP) - MOVQ R9, 88(SP) - - // nextEmit := 0 - MOVQ DX, R10 - - // s := 1 - ADDQ $1, SI - - // nextHash := hash(load32(src, s), shift) - MOVL 0(SI), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - -outer: - // for { etc } - - // skip := 32 - MOVQ $32, R12 - - // nextS := s - MOVQ SI, R13 - - // candidate := 0 - MOVQ $0, R15 - -inner0: - // for { etc } - - // s := nextS - MOVQ R13, SI - - // bytesBetweenHashLookups := skip >> 5 - MOVQ R12, R14 - SHRQ $5, R14 - - // nextS = s + bytesBetweenHashLookups - ADDQ R14, R13 - - // skip += bytesBetweenHashLookups - ADDQ R14, R12 - - // if nextS > sLimit { goto emitRemainder } - MOVQ R13, AX - SUBQ DX, AX - CMPQ AX, R9 - JA emitRemainder - - // candidate = int(table[nextHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[nextHash] = uint16(s) - MOVQ SI, AX - SUBQ DX, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // nextHash = hash(load32(src, nextS), shift) - MOVL 0(R13), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // if load32(src, s) != load32(src, candidate) { continue } break - MOVL 0(SI), AX - MOVL (DX)(R15*1), BX - CMPL AX, BX - JNE inner0 - -fourByteMatch: - // As per the encode_other.go code: - // - // A 4-byte match has been found. We'll later see etc. - - // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment - // on inputMargin in encode.go. - MOVQ SI, AX - SUBQ R10, AX - CMPQ AX, $16 - JLE emitLiteralFastPath - - // ---------------------------------------- - // Begin inline of the emitLiteral call. - // - // d += emitLiteral(dst[d:], src[nextEmit:s]) - - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT inlineEmitLiteralOneByte - CMPL BX, $256 - JLT inlineEmitLiteralTwoBytes - -inlineEmitLiteralThreeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralTwoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralOneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - -inlineEmitLiteralMemmove: - // Spill local variables (registers) onto the stack; call; unspill. - // - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". - MOVQ SI, 72(SP) - MOVQ DI, 80(SP) - MOVQ R15, 112(SP) - CALL runtime·memmove(SB) - MOVQ 56(SP), CX - MOVQ 64(SP), DX - MOVQ 72(SP), SI - MOVQ 80(SP), DI - MOVQ 88(SP), R9 - MOVQ 112(SP), R15 - JMP inner1 - -inlineEmitLiteralEnd: - // End inline of the emitLiteral call. - // ---------------------------------------- - -emitLiteralFastPath: - // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". - MOVB AX, BX - SUBB $1, BX - SHLB $2, BX - MOVB BX, (DI) - ADDQ $1, DI - - // !!! Implement the copy from lit to dst as a 16-byte load and store. - // (Encode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only len(lit) bytes, but that's - // OK. Subsequent iterations will fix up the overrun. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(R10), X0 - MOVOU X0, 0(DI) - ADDQ AX, DI - -inner1: - // for { etc } - - // base := s - MOVQ SI, R12 - - // !!! offset := base - candidate - MOVQ R12, R11 - SUBQ R15, R11 - SUBQ DX, R11 - - // ---------------------------------------- - // Begin inline of the extendMatch call. - // - // s = extendMatch(src, candidate+4, s+4) - - // !!! R14 = &src[len(src)] - MOVQ src_len+32(FP), R14 - ADDQ DX, R14 - - // !!! R13 = &src[len(src) - 8] - MOVQ R14, R13 - SUBQ $8, R13 - - // !!! R15 = &src[candidate + 4] - ADDQ $4, R15 - ADDQ DX, R15 - - // !!! s += 4 - ADDQ $4, SI - -inlineExtendMatchCmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA inlineExtendMatchCmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE inlineExtendMatchBSF - ADDQ $8, R15 - ADDQ $8, SI - JMP inlineExtendMatchCmp8 - -inlineExtendMatchBSF: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - JMP inlineExtendMatchEnd - -inlineExtendMatchCmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE inlineExtendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE inlineExtendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP inlineExtendMatchCmp1 - -inlineExtendMatchEnd: - // End inline of the extendMatch call. - // ---------------------------------------- - - // ---------------------------------------- - // Begin inline of the emitCopy call. - // - // d += emitCopy(dst[d:], base-candidate, s-base) - - // !!! length := s - base - MOVQ SI, AX - SUBQ R12, AX - -inlineEmitCopyLoop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT inlineEmitCopyStep1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP inlineEmitCopyLoop0 - -inlineEmitCopyStep1: - // if length > 64 { etc } - CMPL AX, $64 - JLE inlineEmitCopyStep2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -inlineEmitCopyStep2: - // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } - CMPL AX, $12 - JGE inlineEmitCopyStep3 - CMPL R11, $2048 - JGE inlineEmitCopyStep3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - JMP inlineEmitCopyEnd - -inlineEmitCopyStep3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - -inlineEmitCopyEnd: - // End inline of the emitCopy call. - // ---------------------------------------- - - // nextEmit = s - MOVQ SI, R10 - - // if s >= sLimit { goto emitRemainder } - MOVQ SI, AX - SUBQ DX, AX - CMPQ AX, R9 - JAE emitRemainder - - // As per the encode_other.go code: - // - // We could immediately etc. - - // x := load64(src, s-1) - MOVQ -1(SI), R14 - - // prevHash := hash(uint32(x>>0), shift) - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // table[prevHash] = uint16(s-1) - MOVQ SI, AX - SUBQ DX, AX - SUBQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // currHash := hash(uint32(x>>8), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // candidate = int(table[currHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[currHash] = uint16(s) - ADDQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // if uint32(x>>8) == load32(src, candidate) { continue } - MOVL (DX)(R15*1), BX - CMPL R14, BX - JEQ inner1 - - // nextHash = hash(uint32(x>>16), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // s++ - ADDQ $1, SI - - // break out of the inner1 for loop, i.e. continue the outer loop. - JMP outer - -emitRemainder: - // if nextEmit < len(src) { etc } - MOVQ src_len+32(FP), AX - ADDQ DX, AX - CMPQ R10, AX - JEQ encodeBlockEnd - - // d += emitLiteral(dst[d:], src[nextEmit:]) - // - // Push args. - MOVQ DI, 0(SP) - MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ R10, 24(SP) - SUBQ R10, AX - MOVQ AX, 32(SP) - MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. - - // Spill local variables (registers) onto the stack; call; unspill. - MOVQ DI, 80(SP) - CALL ·emitLiteral(SB) - MOVQ 80(SP), DI - - // Finish the "d +=" part of "d += emitLiteral(etc)". - ADDQ 48(SP), DI - -encodeBlockEnd: - MOVQ dst_base+0(FP), AX - SUBQ AX, DI - MOVQ DI, d+48(FP) - RET diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go deleted file mode 100644 index dbcae905e..000000000 --- a/vendor/github.com/golang/snappy/encode_other.go +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64 appengine !gc noasm - -package snappy - -func load32(b []byte, i int) uint32 { - b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load64(b []byte, i int) uint64 { - b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 -} - -// emitLiteral writes a literal chunk and returns the number of bytes written. -// -// It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= len(lit) && len(lit) <= 65536 -func emitLiteral(dst, lit []byte) int { - i, n := 0, uint(len(lit)-1) - switch { - case n < 60: - dst[0] = uint8(n)<<2 | tagLiteral - i = 1 - case n < 1<<8: - dst[0] = 60<<2 | tagLiteral - dst[1] = uint8(n) - i = 2 - default: - dst[0] = 61<<2 | tagLiteral - dst[1] = uint8(n) - dst[2] = uint8(n >> 8) - i = 3 - } - return i + copy(dst[i:], lit) -} - -// emitCopy writes a copy chunk and returns the number of bytes written. -// -// It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= offset && offset <= 65535 -// 4 <= length && length <= 65535 -func emitCopy(dst []byte, offset, length int) int { - i := 0 - // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The - // threshold for this loop is a little higher (at 68 = 64 + 4), and the - // length emitted down below is is a little lower (at 60 = 64 - 4), because - // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed - // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as - // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as - // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a - // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an - // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. - for length >= 68 { - // Emit a length 64 copy, encoded as 3 bytes. - dst[i+0] = 63<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 64 - } - if length > 64 { - // Emit a length 60 copy, encoded as 3 bytes. - dst[i+0] = 59<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 60 - } - if length >= 12 || offset >= 2048 { - // Emit the remaining copy, encoded as 3 bytes. - dst[i+0] = uint8(length-1)<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - return i + 3 - } - // Emit the remaining copy, encoded as 2 bytes. - dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 - dst[i+1] = uint8(offset) - return i + 2 -} - -// extendMatch returns the largest k such that k <= len(src) and that -// src[i:i+k-j] and src[j:k] have the same contents. -// -// It assumes that: -// 0 <= i && i < j && j <= len(src) -func extendMatch(src []byte, i, j int) int { - for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { - } - return j -} - -func hash(u, shift uint32) uint32 { - return (u * 0x1e35a7bd) >> shift -} - -// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It -// assumes that the varint-encoded length of the decompressed bytes has already -// been written. -// -// It also assumes that: -// len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize -func encodeBlock(dst, src []byte) (d int) { - // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. - // The table element type is uint16, as s < sLimit and sLimit < len(src) - // and len(src) <= maxBlockSize and maxBlockSize == 65536. - const ( - maxTableSize = 1 << 14 - // tableMask is redundant, but helps the compiler eliminate bounds - // checks. - tableMask = maxTableSize - 1 - ) - shift := uint32(32 - 8) - for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - shift-- - } - // In Go, all array elements are zero-initialized, so there is no advantage - // to a smaller tableSize per se. However, it matches the C++ algorithm, - // and in the asm versions of this code, we can get away with zeroing only - // the first tableSize elements. - var table [maxTableSize]uint16 - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := len(src) - inputMargin - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := 0 - - // The encoded form must start with a literal, as there are no previous - // bytes to copy, so we start looking for hash matches at s == 1. - s := 1 - nextHash := hash(load32(src, s), shift) - - for { - // Copied from the C++ snappy implementation: - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match - // is found, immediately go back to looking at every byte. This is a - // small loss (~5% performance, ~0.1% density) for compressible data - // due to more bookkeeping, but for non-compressible data (such as - // JPEG) it's a huge win since the compressor quickly "realizes" the - // data is incompressible and doesn't bother looking for matches - // everywhere. - // - // The "skip" variable keeps track of how many bytes there are since - // the last match; dividing it by 32 (ie. right-shifting by five) gives - // the number of bytes to move ahead for each iteration. - skip := 32 - - nextS := s - candidate := 0 - for { - s = nextS - bytesBetweenHashLookups := skip >> 5 - nextS = s + bytesBetweenHashLookups - skip += bytesBetweenHashLookups - if nextS > sLimit { - goto emitRemainder - } - candidate = int(table[nextHash&tableMask]) - table[nextHash&tableMask] = uint16(s) - nextHash = hash(load32(src, nextS), shift) - if load32(src, s) == load32(src, candidate) { - break - } - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - d += emitLiteral(dst[d:], src[nextEmit:s]) - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - base := s - - // Extend the 4-byte match as long as possible. - // - // This is an inlined version of: - // s = extendMatch(src, candidate+4, s+4) - s += 4 - for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { - } - - d += emitCopy(dst[d:], base-candidate, s-base) - nextEmit = s - if s >= sLimit { - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load64(src, s-1) - prevHash := hash(uint32(x>>0), shift) - table[prevHash&tableMask] = uint16(s - 1) - currHash := hash(uint32(x>>8), shift) - candidate = int(table[currHash&tableMask]) - table[currHash&tableMask] = uint16(s) - if uint32(x>>8) != load32(src, candidate) { - nextHash = hash(uint32(x>>16), shift) - s++ - break - } - } - } - -emitRemainder: - if nextEmit < len(src) { - d += emitLiteral(dst[d:], src[nextEmit:]) - } - return d -} diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go deleted file mode 100644 index ece692ea4..000000000 --- a/vendor/github.com/golang/snappy/snappy.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package snappy implements the Snappy compression format. It aims for very -// high speeds and reasonable compression. -// -// There are actually two Snappy formats: block and stream. They are related, -// but different: trying to decompress block-compressed data as a Snappy stream -// will fail, and vice versa. The block format is the Decode and Encode -// functions and the stream format is the Reader and Writer types. -// -// The block format, the more common case, is used when the complete size (the -// number of bytes) of the original data is known upfront, at the time -// compression starts. The stream format, also known as the framing format, is -// for when that isn't always true. -// -// The canonical, C++ implementation is at https://github.com/google/snappy and -// it only implements the block format. -package snappy // import "github.com/golang/snappy" - -import ( - "hash/crc32" -) - -/* -Each encoded block begins with the varint-encoded length of the decoded data, -followed by a sequence of chunks. Chunks begin and end on byte boundaries. The -first byte of each chunk is broken into its 2 least and 6 most significant bits -called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. -Zero means a literal tag. All other values mean a copy tag. - -For literal tags: - - If m < 60, the next 1 + m bytes are literal bytes. - - Otherwise, let n be the little-endian unsigned integer denoted by the next - m - 59 bytes. The next 1 + n bytes after that are literal bytes. - -For copy tags, length bytes are copied from offset bytes ago, in the style of -Lempel-Ziv compression algorithms. In particular: - - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). - The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 - of the offset. The next byte is bits 0-7 of the offset. - - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). - The length is 1 + m. The offset is the little-endian unsigned integer - denoted by the next 2 bytes. - - For l == 3, this tag is a legacy format that is no longer issued by most - encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in - [1, 65). The length is 1 + m. The offset is the little-endian unsigned - integer denoted by the next 4 bytes. -*/ -const ( - tagLiteral = 0x00 - tagCopy1 = 0x01 - tagCopy2 = 0x02 - tagCopy4 = 0x03 -) - -const ( - checksumSize = 4 - chunkHeaderSize = 4 - magicChunk = "\xff\x06\x00\x00" + magicBody - magicBody = "sNaPpY" - - // maxBlockSize is the maximum size of the input to encodeBlock. It is not - // part of the wire format per se, but some parts of the encoder assume - // that an offset fits into a uint16. - // - // Also, for the framing format (Writer type instead of Encode function), - // https://github.com/google/snappy/blob/master/framing_format.txt says - // that "the uncompressed data in a chunk must be no longer than 65536 - // bytes". - maxBlockSize = 65536 - - // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is - // hard coded to be a const instead of a variable, so that obufLen can also - // be a const. Their equivalence is confirmed by - // TestMaxEncodedLenOfMaxBlockSize. - maxEncodedLenOfMaxBlockSize = 76490 - - obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize - obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize -) - -const ( - chunkTypeCompressedData = 0x00 - chunkTypeUncompressedData = 0x01 - chunkTypePadding = 0xfe - chunkTypeStreamIdentifier = 0xff -) - -var crcTable = crc32.MakeTable(crc32.Castagnoli) - -// crc implements the checksum specified in section 3 of -// https://github.com/google/snappy/blob/master/framing_format.txt -func crc(b []byte) uint32 { - c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 -} diff --git a/vendor/github.com/id01/go-lz4/.gitignore b/vendor/github.com/id01/go-lz4/.gitignore deleted file mode 100644 index be64db617..000000000 --- a/vendor/github.com/id01/go-lz4/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/lz4-example/lz4-example diff --git a/vendor/github.com/id01/go-lz4/.travis.yml b/vendor/github.com/id01/go-lz4/.travis.yml deleted file mode 100644 index d5870798f..000000000 --- a/vendor/github.com/id01/go-lz4/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: go - -go: - - 1.1 - - 1.2 - - 1.3 - - 1.4 - - 1.5 - - tip diff --git a/vendor/github.com/id01/go-lz4/LICENSE b/vendor/github.com/id01/go-lz4/LICENSE deleted file mode 100644 index 0545977b7..000000000 --- a/vendor/github.com/id01/go-lz4/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -Copyright 2011-2012 Branimir Karadzic. All rights reserved. -Copyright 2013 Damian Gryski. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/vendor/github.com/id01/go-lz4/README.md b/vendor/github.com/id01/go-lz4/README.md deleted file mode 100644 index fd65d1590..000000000 --- a/vendor/github.com/id01/go-lz4/README.md +++ /dev/null @@ -1,76 +0,0 @@ -go-lz4 -====== - -go-lz4 is port of LZ4 lossless compression algorithm to Go. The original C code -is located at: - -https://github.com/Cyan4973/lz4 - -Status ------- -[![Build Status](https://secure.travis-ci.org/bkaradzic/go-lz4.png)](http://travis-ci.org/bkaradzic/go-lz4) -[![GoDoc](https://godoc.org/github.com/bkaradzic/go-lz4?status.png)](https://godoc.org/github.com/bkaradzic/go-lz4) - -Usage ------ - - go get github.com/bkaradzic/go-lz4 - - import "github.com/bkaradzic/go-lz4" - -The package name is `lz4` - -Notes ------ - -* go-lz4 saves a uint32 with the original uncompressed length at the beginning - of the encoded buffer. They may get in the way of interoperability with - other implementations. - -Alternative ------------ - -https://github.com/pierrec/lz4 - -Contributors ------------- - -Damian Gryski ([@dgryski](https://github.com/dgryski)) -Dustin Sallings ([@dustin](https://github.com/dustin)) - -Contact -------- - -[@bkaradzic](https://twitter.com/bkaradzic) -http://www.stuckingeometry.com - -Project page -https://github.com/bkaradzic/go-lz4 - -License -------- - -Copyright 2011-2012 Branimir Karadzic. All rights reserved. -Copyright 2013 Damian Gryski. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/vendor/github.com/id01/go-lz4/fuzz.go b/vendor/github.com/id01/go-lz4/fuzz.go deleted file mode 100644 index e4989de9f..000000000 --- a/vendor/github.com/id01/go-lz4/fuzz.go +++ /dev/null @@ -1,23 +0,0 @@ -// +build gofuzz - -package lz4 - -import "encoding/binary" - -func Fuzz(data []byte) int { - - if len(data) < 4 { - return 0 - } - - ln := binary.LittleEndian.Uint32(data) - if ln > (1 << 21) { - return 0 - } - - if _, err := Decode(nil, data); err != nil { - return 0 - } - - return 1 -} diff --git a/vendor/github.com/id01/go-lz4/reader.go b/vendor/github.com/id01/go-lz4/reader.go deleted file mode 100644 index 254745a37..000000000 --- a/vendor/github.com/id01/go-lz4/reader.go +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright 2011-2012 Branimir Karadzic. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -package lz4 - -import ( - "encoding/binary" - "errors" - "io" -) - -var ( - // ErrCorrupt indicates the input was corrupt - ErrCorrupt = errors.New("corrupt input") - ErrOutOfMemory = errors.New("out of memory") // Indicates dst wasn't large enough -) - -const ( - mlBits = 4 - mlMask = (1 << mlBits) - 1 - runBits = 8 - mlBits - runMask = (1 << runBits) - 1 -) - -type decoder struct { - src []byte - dst []byte - spos uint32 - dpos uint32 - ref uint32 -} - -func (d *decoder) readByte() (uint8, error) { - if int(d.spos) == len(d.src) { - return 0, io.EOF - } - b := d.src[d.spos] - d.spos++ - return b, nil -} - -func (d *decoder) getLen() (uint32, error) { - - length := uint32(0) - ln, err := d.readByte() - if err != nil { - return 0, ErrCorrupt - } - for ln == 255 { - length += 255 - ln, err = d.readByte() - if err != nil { - return 0, ErrCorrupt - } - } - length += uint32(ln) - - return length, nil -} - -func (d *decoder) cp(length, decr uint32) { - - if int(d.ref+length) < int(d.dpos) { - copy(d.dst[d.dpos:], d.dst[d.ref:d.ref+length]) - } else { - for ii := uint32(0); ii < length; ii++ { - d.dst[d.dpos+ii] = d.dst[d.ref+ii] - } - } - d.dpos += length - d.ref += length - decr -} - -func (d *decoder) finish(err error) error { - if err == io.EOF { - return nil - } - - return err -} - -// Decode returns the decoded form of src. The returned slice is a subslice of dst. -// Must have an input of dst with large enough length to hold the decompressed buffer. -func Decode(dst, src []byte) ([]byte, error) { - - if len(src) < 4 { - return nil, ErrCorrupt - } - - compressedLen := binary.LittleEndian.Uint32(src) - - if compressedLen == 0 { - return nil, ErrCorrupt - } - - if compressedLen > MaxInputSize { - return nil, ErrTooLarge - } - - d := decoder{src: src, dst: dst, spos: 4} - - decr := []uint32{0, 3, 2, 3} - - for { - code, err := d.readByte() - if err != nil { - return d.dst[:d.dpos], d.finish(err) - } - - length := uint32(code >> mlBits) - if length == runMask { - ln, err := d.getLen() - if err != nil { - return nil, ErrCorrupt - } - length += ln - } - - if int(d.spos+length) > len(d.src) || int(d.dpos+length) > len(d.dst) { - return nil, ErrOutOfMemory - } - - for ii := uint32(0); ii < length; ii++ { - d.dst[d.dpos+ii] = d.src[d.spos+ii] - } - - d.spos += length - d.dpos += length - - if int(d.spos) == len(d.src) { - return d.dst[:d.dpos], nil - } - - if int(d.spos+2) >= len(d.src) { - return nil, ErrCorrupt - } - - back := uint32(d.src[d.spos]) | uint32(d.src[d.spos+1])<<8 - - if back > d.dpos { - return nil, ErrCorrupt - } - - d.spos += 2 - d.ref = d.dpos - back - - length = uint32(code & mlMask) - if length == mlMask { - ln, err := d.getLen() - if err != nil { - return nil, ErrCorrupt - } - length += ln - } - - literal := d.dpos - d.ref - - if literal < 4 { - if int(d.dpos+4) > len(d.dst) { - return nil, ErrOutOfMemory - } - - d.cp(4, decr[literal]) - } else { - length += 4 - } - - if d.dpos+length > uint32(len(d.dst)) { - return nil, ErrOutOfMemory - } - - d.cp(length, 0) - } -} diff --git a/vendor/github.com/id01/go-lz4/writer.go b/vendor/github.com/id01/go-lz4/writer.go deleted file mode 100644 index c69563919..000000000 --- a/vendor/github.com/id01/go-lz4/writer.go +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright 2011-2012 Branimir Karadzic. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -package lz4 - -import ( - "encoding/binary" - "errors" -) - -const ( - minMatch = 4 - hashLog = 17 - hashTableSize = 1 << hashLog - hashShift = (minMatch * 8) - hashLog - incompressible uint32 = 128 - uninitHash = 0x88888888 - - // MaxInputSize is the largest buffer than can be compressed in a single block - MaxInputSize = 0x7E000000 -) - -var ( - // ErrTooLarge indicates the input buffer was too large - ErrTooLarge = errors.New("input too large") -) - -type encoder struct { - src []byte - dst []byte - hashTable []uint32 - pos uint32 - anchor uint32 - dpos uint32 -} - -// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length -func CompressBound(isize int) int { - if isize > MaxInputSize { - return 0 - } - return isize + ((isize) / 255) + 16 + 4 -} - -func (e *encoder) writeLiterals(length, mlLen, pos uint32) { - - ln := length - - var code byte - if ln > runMask-1 { - code = runMask - } else { - code = byte(ln) - } - - if mlLen > mlMask-1 { - e.dst[e.dpos] = (code << mlBits) + byte(mlMask) - } else { - e.dst[e.dpos] = (code << mlBits) + byte(mlLen) - } - e.dpos++ - - if code == runMask { - ln -= runMask - for ; ln > 254; ln -= 255 { - e.dst[e.dpos] = 255 - e.dpos++ - } - - e.dst[e.dpos] = byte(ln) - e.dpos++ - } - - for ii := uint32(0); ii < length; ii++ { - e.dst[e.dpos+ii] = e.src[pos+ii] - } - - e.dpos += length -} - -// Encode returns the encoded form of src. The returned array may be a -// sub-slice of dst if it was large enough to hold the entire output. -func Encode(dst, src []byte) ([]byte, error) { - - if len(src) >= MaxInputSize { - return nil, ErrTooLarge - } - - if n := CompressBound(len(src)); len(dst) < n { - dst = make([]byte, n) - } - - e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)} - - e.dpos = 4 - - var ( - step uint32 = 1 - limit = incompressible - ) - - for { - if int(e.pos)+12 >= len(e.src) { - e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor) - binary.LittleEndian.PutUint32(dst, uint32(e.dpos-4)) // Subtract 4 because the compressed size isn't counted as part of the block - return e.dst[:e.dpos], nil - } - - sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0]) - - hash := (sequence * 2654435761) >> hashShift - ref := e.hashTable[hash] + uninitHash - e.hashTable[hash] = e.pos - uninitHash - - if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence { - if e.pos-e.anchor > limit { - limit <<= 1 - step += 1 + (step >> 2) - } - e.pos += step - continue - } - - if step > 1 { - e.hashTable[hash] = ref - uninitHash - e.pos -= step - 1 - step = 1 - continue - } - limit = incompressible - - ln := e.pos - e.anchor - back := e.pos - ref - - anchor := e.anchor - - e.pos += minMatch - ref += minMatch - e.anchor = e.pos - - for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] { - e.pos++ - ref++ - } - - mlLen := e.pos - e.anchor - - e.writeLiterals(ln, mlLen, anchor) - e.dst[e.dpos] = uint8(back) - e.dst[e.dpos+1] = uint8(back >> 8) - e.dpos += 2 - - if mlLen > mlMask-1 { - mlLen -= mlMask - for mlLen > 254 { - mlLen -= 255 - - e.dst[e.dpos] = 255 - e.dpos++ - } - - e.dst[e.dpos] = byte(mlLen) - e.dpos++ - } - - e.anchor = e.pos - } -} diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/klauspost/compress/LICENSE similarity index 92% rename from vendor/github.com/golang/snappy/LICENSE rename to vendor/github.com/klauspost/compress/LICENSE index 6050c10f4..1eb75ef68 100644 --- a/vendor/github.com/golang/snappy/LICENSE +++ b/vendor/github.com/klauspost/compress/LICENSE @@ -1,4 +1,5 @@ -Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. +Copyright (c) 2012 The Go Authors. All rights reserved. +Copyright (c) 2019 Klaus Post. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go new file mode 100644 index 000000000..3f4d76b6a --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -0,0 +1,817 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "io" + "math" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + BestCompression = 9 + DefaultCompression = -1 + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 + ConstantCompression = HuffmanOnly // compatibility alias. + + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 4 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we put into a single flat block, just too + // stop things from getting too large. + maxFlateBlockTokens = 1 << 14 + maxStoreBlockSize = 65535 + hashBits = 17 // After 17 performance degrades + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 24 + + skipNever = math.MaxInt32 +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing, level int +} + +// Compression levels have been rebalanced from zlib deflate defaults +// to give a bigger spread in speed and compression. +// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ +var levels = []compressionLevel{ + {}, // 0 + // Level 1-6 uses specialized algorithm - values not used + {0, 0, 0, 0, 0, 1}, + {0, 0, 0, 0, 0, 2}, + {0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 4}, + {0, 0, 0, 0, 0, 5}, + {0, 0, 0, 0, 0, 6}, + // Levels 7-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {8, 8, 24, 16, skipNever, 7}, + {10, 16, 24, 64, skipNever, 8}, + {32, 258, 258, 4096, skipNever, 9}, +} + +// advancedState contains state for the advanced levels, with bigger hash tables, etc. +type advancedState struct { + // deflate state + length int + offset int + hash uint32 + maxInsertIndex int + ii uint16 // position of last match, intended to overflow to reset. + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + chainHead int + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 + hashOffset int + + // input window: unprocessed data is window[index:windowEnd] + index int + hashMatch [maxMatchLength + minMatchLength]uint32 +} + +type compressor struct { + compressionLevel + + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + sync bool // requesting flush + + window []byte + windowEnd int + blockStart int // window index where current tokens start + byteAvailable bool // if true, still need to process window[index-1]. + err error + + // queued output tokens + tokens tokens + fast fastEnc + state *advancedState +} + +func (d *compressor) fillDeflate(b []byte) int { + s := d.state + if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window[:], d.window[windowSize:2*windowSize]) + s.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + s.hashOffset += windowSize + if s.hashOffset > maxHashOffset { + delta := s.hashOffset - 1 + s.hashOffset -= delta + s.chainHead -= delta + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range s.hashPrev[:] { + if int(v) > delta { + s.hashPrev[i] = uint32(int(v) - delta) + } else { + s.hashPrev[i] = 0 + } + } + for i, v := range s.hashHead[:] { + if int(v) > delta { + s.hashHead[i] = uint32(int(v) - delta) + } else { + s.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + d.w.writeBlock(tok, eof, window) + return d.w.err + } + return nil +} + +// writeBlockSkip writes the current block and uses the number of tokens +// to determine if the block should be stored on no matches, or +// only huffman encoded. +func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + if d.blockStart <= index { + window := d.window[d.blockStart:index] + // If we removed less than a 64th of all literals + // we huffman compress the block. + if int(tok.n) > len(window)-int(tok.n>>6) { + d.w.writeBlockHuff(eof, window, d.sync) + } else { + // Write a dynamic huffman block. + d.w.writeBlockDynamic(tok, eof, window, d.sync) + } + } else { + d.w.writeBlock(tok, eof, nil) + } + d.blockStart = index + return d.w.err + } + return nil +} + +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a start/reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only or huffman mode. + if d.level <= 0 { + return + } + if d.fast != nil { + // encode the last data, but discard the result + if len(b) > maxMatchOffset { + b = b[len(b)-maxMatchOffset:] + } + d.fast.Encode(&d.tokens, b) + d.tokens.Reset() + return + } + s := d.state + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window[d.windowEnd:], b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + startindex := j * 256 + end := startindex + 256 + minMatchLength - 1 + if end > n { + end = n + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + s.hash = newH + } + // Update window information. + d.windowEnd += n + s.index = n +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = prevLength + if length >= d.good { + tries >>= 2 + } + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := pos - windowSize + + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + + if n > length && (n > minMatchLength || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex || i < 0 { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + b = b[:4] + return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < 4 { + return + } + hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + dst[0] = hash4u(hb, hashBits) + end := len(b) - 4 + 1 + for i := 1; i < end; i++ { + hb = (hb << 8) | uint32(b[i+3]) + dst[i] = hash4u(hb, hashBits) + } +} + +func (d *compressor) initDeflate() { + d.window = make([]byte, 2*windowSize) + d.byteAvailable = false + d.err = nil + if d.state == nil { + return + } + s := d.state + s.index = 0 + s.hashOffset = 1 + s.length = minMatchLength - 1 + s.offset = 0 + s.hash = 0 + s.chainHead = -1 +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazy() { + s := d.state + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = false + + if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if s.index < s.maxInsertIndex { + s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + } + + for { + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - s.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + return + } + } + if s.index < s.maxInsertIndex { + // Update the hash + s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + ch := s.hashHead[s.hash&hashMask] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset) + } + prevLength := s.length + prevOffset := s.offset + s.length = minMatchLength - 1 + s.offset = 0 + minIndex := s.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok { + s.length = newLength + s.offset = newOffset + } + } + if prevLength >= minMatchLength && s.length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + var newIndex int + newIndex = s.index + prevLength - 1 + // Calculate missing hashes + end := newIndex + if end > s.maxInsertIndex { + end = s.maxInsertIndex + } + end += minMatchLength - 1 + startindex := s.index + 1 + if startindex > s.maxInsertIndex { + startindex = s.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + s.hash = newH + } + + s.index = newIndex + d.byteAvailable = false + s.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } else { + // Reset, if we got a match this run. + if s.length >= minMatchLength { + s.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + s.ii++ + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when s.ii overflows after 64KB. + if s.ii > 31 { + n := int(s.ii >> 5) + for j := 0; j < n; j++ { + if s.index >= d.windowEnd-1 { + break + } + + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + } + // Flush last byte + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } + } else { + s.index++ + d.byteAvailable = true + } + } + } +} + +func (d *compressor) store() { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 + } +} + +// fillWindow will fill the buffer with data for huffman-only compression. +// The number of bytes copied is returned. +func (d *compressor) fillBlock(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + d.windowEnd = 0 +} + +// storeFast will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeFast() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < len(d.window) { + if !d.sync { + return + } + // Handle extremely small sizes. + if d.windowEnd < 128 { + if d.windowEnd == 0 { + return + } + if d.windowEnd <= 32 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } else { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 + d.fast.Reset() + return + } + } + + d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) + // If we made zero matches, store the block as is. + if d.tokens.n == 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + // If we removed less than 1/16th, huffman compress the block. + } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } else { + d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 +} + +// write will add input byte to the stream. +// Unless an error occurs all bytes will be consumed. +func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } + n = len(b) + for len(b) > 0 { + d.step(d) + b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + if d.err != nil { + return d.err + } + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).store + case level == ConstantCompression: + d.w.logNewTablePenalty = 4 + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeHuff + case level == DefaultCompression: + level = 5 + fallthrough + case level >= 1 && level <= 6: + d.w.logNewTablePenalty = 6 + d.fast = newFastEnc(level) + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast + case 7 <= level && level <= 9: + d.w.logNewTablePenalty = 10 + d.state = &advancedState{} + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflateLazy + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) + } + d.level = level + return nil +} + +// reset the state of the compressor. +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + // We only need to reset a few things for Snappy. + if d.fast != nil { + d.fast.Reset() + d.windowEnd = 0 + d.tokens.Reset() + return + } + switch d.compressionLevel.chain { + case 0: + // level was NoCompression or ConstantCompresssion. + d.windowEnd = 0 + default: + s := d.state + s.chainHead = -1 + for i := range s.hashHead { + s.hashHead[i] = 0 + } + for i := range s.hashPrev { + s.hashPrev[i] = 0 + } + s.hashOffset = 1 + s.index, d.windowEnd = 0, 0 + d.blockStart, d.byteAvailable = 0, false + d.tokens.Reset() + s.length = minMatchLength - 1 + s.offset = 0 + s.hash = 0 + s.ii = 0 + s.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + if d.err != nil { + return d.err + } + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + d.w.reset(nil) + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (ConstantCompression) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. +// +// If level is in the range [-2, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + zw, err := NewWriter(w, level) + if err != nil { + return nil, err + } + zw.d.fillWindow(dict) + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if len(w.dict) > 0 { + // w was created with NewWriterDict + w.d.reset(dst) + if dst != nil { + w.d.fillWindow(w.dict) + } + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} + +// ResetDict discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level, but sets a specific dictionary. +func (w *Writer) ResetDict(dst io.Writer, dict []byte) { + w.dict = dict + w.d.reset(dst) + w.d.fillWindow(w.dict) +} diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go new file mode 100644 index 000000000..71c75a065 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -0,0 +1,184 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// * Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// * Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. +loop: + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + if dstPos < endPos { + goto loop // Avoid for-loop so that this function can be inlined + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go new file mode 100644 index 000000000..3d2fdcd77 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -0,0 +1,255 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Modified for deflate by Klaus Post (c) 2015. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "math/bits" +) + +type fastEnc interface { + Encode(dst *tokens, src []byte) + Reset() +} + +func newFastEnc(level int) fastEnc { + switch level { + case 1: + return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} + case 2: + return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} + case 3: + return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} + case 4: + return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} + case 5: + return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} + case 6: + return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} + default: + panic("invalid level specified") + } +} + +const ( + tableBits = 16 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. + baseMatchOffset = 1 // The smallest match offset + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + maxMatchOffset = 1 << 15 // The largest match offset + + bTableBits = 18 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. +) + +const ( + prime3bytes = 506832829 + prime4bytes = 2654435761 + prime5bytes = 889523592379 + prime6bytes = 227718039650203 + prime7bytes = 58295818150454627 + prime8bytes = 0xcf1bbcdcb7a56463 +) + +func load32(b []byte, i int) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func load3232(b []byte, i int32) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6432(b []byte, i int32) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func hash(u uint32) uint32 { + return (u * 0x1e35a7bd) >> tableShift +} + +type tableEntry struct { + val uint32 + offset int32 +} + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastGen struct { + hist []byte + cur int32 +} + +func (e *fastGen) addBlock(src []byte) int32 { + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < maxMatchOffset*2 { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> ((32 - h) & 31) +} + +type tableEntryPrev struct { + Cur tableEntry + Prev tableEntry +} + +// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4x64(u uint64, h uint8) uint32 { + return (uint32(u) * prime4bytes) >> ((32 - h) & 31) +} + +// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash7(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) +} + +// hash8 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash8(u uint64, h uint8) uint32 { + return uint32((u * prime8bytes) >> ((64 - h) & 63)) +} + +// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash6(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + +// Reset the encoding table. +func (e *fastGen) Reset() { + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +// matchLen returns the maximum length. +// 'a' must be the shortest of the two. +func matchLen(a, b []byte) int { + b = b[:len(a)] + var checked int + if len(a) > 4 { + // Try 4 bytes first + if diff := load32(a, 0) ^ load32(b, 0); diff != 0 { + return bits.TrailingZeros32(diff) >> 3 + } + // Switch to 8 byte matching. + checked = 4 + a = a[4:] + b = b[4:] + for len(a) >= 8 { + b = b[:len(a)] + if diff := load64(a, 0) ^ load64(b, 0); diff != 0 { + return checked + (bits.TrailingZeros64(diff) >> 3) + } + checked += 8 + a = a[8:] + b = b[8:] + } + } + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + return int(i) + checked + } + } + return len(a) + checked +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..56ee6dc8b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -0,0 +1,898 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 + + // bufferFlushSize indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + bufferFlushSize = 240 + + // bufferSize is the actual output byte buffer size. + // It must have additional headroom for a flush + // which can contain up to 8 bytes. + bufferSize = bufferFlushSize + 8 +) + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = [32]int8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = [32]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// offset code word extra bits. +var offsetExtraBits = [64]int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, +} + +var offsetBase = [64]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + // writer is the underlying writer. + // Do not use it directly; use the write method, which ensures + // that Write errors are sticky. + writer io.Writer + + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint64 + nbits uint16 + nbytes uint8 + literalEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error + lastHeader int + // Set between 0 (reused block can be up to 2x the size) + logNewTablePenalty uint + lastHuffMan bool + bytes [256]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 + + // codegen must have an extra space for the final symbol. + codegen [literalCount + offsetCodeCount + 1]uint8 +} + +// Huffman reuse. +// +// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. +// +// This is controlled by several variables: +// +// If lastHeader is non-zero the Huffman table can be reused. +// This also indicates that a Huffman table has been generated that can output all +// possible symbols. +// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated +// an EOB with the previous table must be written. +// +// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. +// +// An incoming block estimates the output size of a new table using a 'fresh' by calculating the +// optimal size and adding a penalty in 'logNewTablePenalty'. +// A Huffman table is not optimal, which is why we add a penalty, and generating a new table +// is slower both for compression and decompression. + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + writer: w, + literalEncoding: newHuffmanEncoder(literalCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.writer = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.lastHeader = 0 + w.lastHuffMan = false +} + +func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) { + offsets, lits = true, true + a := t.offHist[:offsetCodeCount] + b := w.offsetFreq[:len(a)] + for i := range a { + if b[i] == 0 && a[i] != 0 { + offsets = false + break + } + } + + a = t.extraHist[:literalCount-256] + b = w.literalFreq[256:literalCount] + b = b[:len(a)] + for i := range a { + if b[i] == 0 && a[i] != 0 { + lits = false + break + } + } + if lits { + a = t.litHist[:] + b = w.literalFreq[:len(a)] + for i := range a { + if b[i] == 0 && a[i] != 0 { + lits = false + break + } + } + } + return +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + n := w.nbytes + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + n++ + } + w.bits = 0 + w.write(w.bytes[:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) write(b []byte) { + if w.err != nil { + return + } + _, w.err = w.writer.Write(b) +} + +func (w *huffmanBitWriter) writeBits(b int32, nb uint16) { + w.bits |= uint64(b) << (w.nbits & 63) + w.nbits += nb + if w.nbits >= 48 { + w.writeOutBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits&7 != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if n != 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 + w.write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen[:] // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + cgnl := codegen[:numLiterals] + for i := range cgnl { + cgnl[i] = uint8(litEnc.codes[i].len) + } + + cgnl = codegen[numLiterals : numLiterals+numOffsets] + for i := range cgnl { + cgnl[i] = uint8(offEnc.codes[i].len) + } + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := 6 + if n > count { + n = count + } + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := 138 + if n > count { + n = count + } + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) codegens() int { + numCodegens := len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return numCodegens +} + +func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return 3 + 5 + 5 + 4 + (3 * numCodegens) + + w.codegenEncoding.bitLength(w.codegenFreq[:]) + + int(w.codegenFreq[16])*2 + + int(w.codegenFreq[17])*3 + + int(w.codegenFreq[18])*7, numCodegens +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { + header, numCodegens := w.headerSize() + size = header + + litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + + extraBits + return size, numCodegens +} + +// extraBitSize will return the number of bits that will be written +// as "extra" bits on matches. +func (w *huffmanBitWriter) extraBitSize() int { + total := 0 + for i, n := range w.literalFreq[257:literalCount] { + total += int(n) * int(lengthExtraBits[i&31]) + } + for i, n := range w.offsetFreq[:offsetCodeCount] { + total += int(n) * int(offsetExtraBits[i&31]) + } + return total +} + +// fixedSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) fixedSize(extraBits int) int { + return 3 + + fixedLiteralEncoding.bitLength(w.literalFreq[:]) + + fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + + extraBits +} + +// storedSize calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { + if in == nil { + return 0, false + } + if len(in) <= maxStoreBlockSize { + return (len(in) + 5) * 8, true + } + return 0, false +} + +func (w *huffmanBitWriter) writeCode(c hcode) { + // The function does not get inlined if we "& 63" the shift. + w.bits |= uint64(c.code) << w.nbits + w.nbits += c.len + if w.nbits >= 48 { + w.writeOutBits() + } +} + +// writeOutBits will write bits to the buffer. +func (w *huffmanBitWriter) writeOutBits() { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + w.bytes[n+2] = byte(bits >> 16) + w.bytes[n+3] = byte(bits >> 24) + w.bytes[n+4] = byte(bits >> 32) + w.bytes[n+5] = byte(bits >> 40) + n += 6 + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + w.nbytes = n +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len) + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord = uint32(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + w.writeCode(w.codegenEncoding.codes[codeWord]) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + } + } +} + +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +// writeBlock will write a block of tokens with the smallest encoding. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is nil, the tokens will always be Huffman encoded. +func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens.AddEOB() + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + numLiterals, numOffsets := w.indexTokens(tokens, false) + w.generate(tokens) + var extraBits int + storedSize, storable := w.storedSize(input) + if storable { + extraBits = w.extraBitSize() + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + var size = w.fixedSize(extraBits) + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storable && storedSize < size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + + // Write the tokens. + w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + sync = sync || eof + if sync { + tokens.AddEOB() + } + + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { + // We will not try to reuse. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } + if !sync { + tokens.Fill() + } + numLiterals, numOffsets := w.indexTokens(tokens, !sync) + + var size int + // Check if we should reuse. + if w.lastHeader > 0 { + // Estimate size for using a new table. + // Use the previous header size as the best estimate. + newSize := w.lastHeader + tokens.EstimatedBits() + newSize += newSize >> w.logNewTablePenalty + + // The estimated size is calculated as an optimal table. + // We add a penalty to make it more realistic and re-use a bit more. + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize() + + // Check if a new table is better. + if newSize < reuseSize { + // Write the EOB we owe. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + size = newSize + w.lastHeader = 0 + } else { + size = reuseSize + } + // Check if we get a reasonable size decrease. + if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + w.lastHeader = 0 + return + } + } + + // We want a new block/table + if w.lastHeader == 0 { + w.generate(tokens) + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + var numCodegens int + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize()) + // Store bytes, if we don't get a reasonable improvement. + if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + w.lastHeader = 0 + return + } + + // Write Huffman table. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHeader, _ = w.headerSize() + w.lastHuffMan = false + } + + if sync { + w.lastHeader = 0 + } + // Write the tokens. + w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) +} + +// indexTokens indexes a slice of tokens, and updates +// literalFreq and offsetFreq, and generates literalEncoding +// and offsetEncoding. +// The number of literal and offset tokens is returned. +func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { + copy(w.literalFreq[:], t.litHist[:]) + copy(w.literalFreq[256:], t.extraHist[:]) + copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + + if t.n == 0 { + return + } + if filled { + return maxNumLit, maxNumDist + } + // get the number of literals + numLiterals = len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets = len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + return +} + +func (w *huffmanBitWriter) generate(t *tokens) { + w.literalEncoding.generate(w.literalFreq[:literalCount], 15) + w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeTokens writes a slice of tokens to the output. +// codes for literal and offset encoding must be supplied. +func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { + if w.err != nil { + return + } + if len(tokens) == 0 { + return + } + + // Only last token should be endBlockMarker. + var deferEOB bool + if tokens[len(tokens)-1] == endBlockMarker { + tokens = tokens[:len(tokens)-1] + deferEOB = true + } + + // Create slices up to the next power of two to avoid bounds checks. + lits := leCodes[:256] + offs := oeCodes[:32] + lengths := leCodes[lengthCodesStart:] + lengths = lengths[:32] + for _, t := range tokens { + if t < matchType { + w.writeCode(lits[t.literal()]) + continue + } + + // Write the length + length := t.length() + lengthCode := lengthCode(length) + if false { + w.writeCode(lengths[lengthCode&31]) + } else { + // inlined + c := lengths[lengthCode&31] + w.bits |= uint64(c.code) << (w.nbits & 63) + w.nbits += c.len + if w.nbits >= 48 { + w.writeOutBits() + } + } + + extraLengthBits := uint16(lengthExtraBits[lengthCode&31]) + if extraLengthBits > 0 { + extraLength := int32(length - lengthBase[lengthCode&31]) + w.writeBits(extraLength, extraLengthBits) + } + // Write the offset + offset := t.offset() + offsetCode := offsetCode(offset) + if false { + w.writeCode(offs[offsetCode&31]) + } else { + // inlined + c := offs[offsetCode&31] + w.bits |= uint64(c.code) << (w.nbits & 63) + w.nbits += c.len + if w.nbits >= 48 { + w.writeOutBits() + } + } + extraOffsetBits := uint16(offsetExtraBits[offsetCode&63]) + if extraOffsetBits > 0 { + extraOffset := int32(offset - offsetBase[offsetCode&63]) + w.writeBits(extraOffset, extraOffsetBits) + } + } + if deferEOB { + w.writeCode(leCodes[endBlockMarker]) + } +} + +// huffOffset is a static offset encoder used for huffman only encoding. +// It can be reused since we will not be encoding offset values. +var huffOffset *huffmanEncoder + +func init() { + w := newHuffmanBitWriter(nil) + w.offsetFreq[0] = 1 + huffOffset = newHuffmanEncoder(offsetCodeCount) + huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeBlockHuff encodes a block of bytes as either +// Huffman encoded literals or uncompressed bytes if the +// results only gains very little from compression. +func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + // Clear histogram + for i := range w.literalFreq[:] { + w.literalFreq[i] = 0 + } + if !w.lastHuffMan { + for i := range w.offsetFreq[:] { + w.offsetFreq[i] = 0 + } + } + + // Add everything as literals + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync) + estBits += w.lastHeader + 15 + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty + + // Store bytes, if we don't get a reasonable improvement. + ssize, storable := w.storedSize(input) + if storable && ssize < estBits { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + if w.lastHeader > 0 { + reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256]) + estBits += estExtra + + if estBits < reuseSize { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + } + + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + if w.lastHeader == 0 { + w.literalFreq[endBlockMarker] = 1 + w.literalEncoding.generate(w.literalFreq[:numLiterals], 15) + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + numCodegens := w.codegens() + + // Huffman. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHuffMan = true + w.lastHeader, _ = w.headerSize() + } + + encoding := w.literalEncoding.codes[:257] + for _, t := range input { + // Bitwriting inlined, ~30% speedup + c := encoding[t] + w.bits |= uint64(c.code) << ((w.nbits) & 63) + w.nbits += c.len + if w.nbits >= 48 { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + w.bytes[n+2] = byte(bits >> 16) + w.bytes[n+3] = byte(bits >> 24) + w.bytes[n+4] = byte(bits >> 32) + w.bytes[n+5] = byte(bits >> 40) + n += 6 + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + w.nbytes = n + } + } + if eof || sync { + w.writeCode(encoding[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go new file mode 100644 index 000000000..9d8e81ad6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -0,0 +1,363 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "math/bits" +) + +const ( + maxBitsLimit = 16 + // number of valid literals + literalCount = 286 +) + +// hcode is a huffman code with a bit code and bit length. +type hcode struct { + code, len uint16 +} + +type huffmanEncoder struct { + codes []hcode + freqcache []literalNode + bitCount [17]int32 +} + +type literalNode struct { + literal uint16 + freq uint16 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +// set sets the code and length of an hcode. +func (h *hcode) set(code uint16, length uint16) { + h.len = length + h.code = code +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << ((16 - bitLength) & 15)) +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + // Make capacity to next power of two. + c := uint(bits.Len32(uint32(size - 1))) + return &huffmanEncoder{codes: make([]hcode, size, 1<= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// Must be less than 16. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: int32(list[1].freq), + nextCharFreq: int32(list[2].freq), + nextPairFreq: int32(list[0].freq) + int32(list[1].freq), + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := maxBits + for { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + e := list[n] + if e.literal < math.MaxUint16 { + l.nextCharFreq = int32(e.freq) + } else { + l.nextCharFreq = math.MaxInt32 + } + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := h.bitCount[:maxBits+1] + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + + sortByLiteral(chunk) + for _, node := range chunk { + h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { + if h.freqcache == nil { + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + h.freqcache = make([]literalNode, literalCount+1) + } + list := h.freqcache[:len(freq)+1] + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + list[count] = literalNode{} + h.codes[i].len = 0 + } + } + list[len(freq)] = literalNode{} + + list = list[:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codes[node.literal].set(uint16(i), 1) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 + } + return v +} + +// histogramSize accumulates a histogram of b in h. +// An estimated size in bits is returned. +// Unassigned values are assigned '1' in the histogram. +// len(h) must be >= 256, and h's elements must be all zeroes. +func histogramSize(b []byte, h []uint16, fill bool) (int, int) { + h = h[:256] + for _, t := range b { + h[t]++ + } + invTotal := 1.0 / float32(len(b)) + shannon := float32(0.0) + var extra float32 + if fill { + oneBits := atLeastOne(-mFastLog2(invTotal)) + for i, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } else { + h[i] = 1 + extra += oneBits + } + } + } else { + for _, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } + } + } + + return int(shannon + 0.99), int(extra + 0.99) +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go new file mode 100644 index 000000000..207780299 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,178 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 000000000..93f1aea10 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go new file mode 100644 index 000000000..6dc5b5d06 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -0,0 +1,937 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "fmt" + "io" + "math/bits" + "strconv" + "sync" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxCodeLenMask = 15 // mask for max length of Huffman code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code + + debugDecode = false +) + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Read +} + +func (e *ReadError) Error() string { + return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Write +} + +func (e *WriteError) Error() string { + return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks *[huffmanNumChunks]uint16 // chunks as described above + links [][]uint16 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.chunks == nil { + h.chunks = &[huffmanNumChunks]uint16{} + } + if h.min != 0 { + *h = huffmanDecoder{chunks: h.chunks, links: h.links} + } + + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n&maxCodeLenMask]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i&maxCodeLenMask] = code + code += count[i&maxCodeLenMask] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1< huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + if cap(h.links) < huffmanNumChunks-link { + h.links = make([][]uint16, huffmanNumChunks-link) + } else { + h.links = h.links[:huffmanNumChunks-link] + } + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint16(off<>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + final bool + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + if debugDecode { + fmt.Println("reserved data block encountered") + } + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (f *decompressor) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + flushed := false + for { + if len(f.toRead) > 0 { + n, err := w.Write(f.toRead) + total += int64(n) + if err != nil { + f.err = err + return total, err + } + if n != len(f.toRead) { + return total, io.ErrShortWrite + } + f.toRead = f.toRead[:0] + } + if f.err != nil && flushed { + if f.err == io.EOF { + return total, nil + } + return total, f.err + } + if f.err == nil { + f.step(f) + } + if len(f.toRead) == 0 && f.err != nil && !flushed { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + flushed = true + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + if debugDecode { + fmt.Println("nlit > maxNumLit", nlit) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + if debugDecode { + fmt.Println("ndist > maxNumDist", ndist) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + if debugDecode { + fmt.Println("init codebits failed") + } + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + if debugDecode { + fmt.Println("i==0") + } + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits:", err) + } + return err + } + } + rep += int(f.b & uint32(1<>= nb + f.nb -= nb + if i+rep > n { + if debugDecode { + fmt.Println("i+rep > n", i, rep, n) + } + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + if debugDecode { + fmt.Println("init2 failed") + } + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the min bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.min < f.bits[endBlockMarker] { + f.h1.min = f.bits[endBlockMarker] + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlock() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + if debugDecode { + fmt.Println("uint16(nn) != uint16(^n)", nn, ^n) + } + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = n + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.min) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + h1: f.h1, + h2: f.h2, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go new file mode 100644 index 000000000..102fc74c7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -0,0 +1,179 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL1 struct { + fastGen + table [tableSize]tableEntry +} + +// EncodeL1 uses a similar algorithm to level 1 +func (e *fastEncL1) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash(cv) + candidate = e.table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + nextHash = hash(uint32(now)) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == candidate.val { + e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == candidate.val { + e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + // Save the match found + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv} + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash(uint32(x)) + e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} + x >>= 16 + currHash := hash(uint32(x)) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x) != candidate.val { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go new file mode 100644 index 000000000..dc6b1d314 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -0,0 +1,205 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL2 struct { + fastGen + table [bTableSize]tableEntry +} + +// EncodeL2 uses a similar algorithm to level 1, but is capable +// of matching across blocks giving better compression at a small slowdown. +func (e *fastEncL2) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + // When should we start skipping if we haven't found matches in a long while. + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash4u(cv, bTableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidate = e.table[nextHash] + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + nextHash = hash4u(uint32(now), bTableBits) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == candidate.val { + e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == candidate.val { + break + } + cv = uint32(now) + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv} + } + goto emitRemainder + } + + // Store every second hash in-between, but offset by 1. + for i := s - l + 2; i < s-5; i += 7 { + x := load6432(src, int32(i)) + nextHash := hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)} + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash4u(uint32(x), bTableBits) + prevHash2 := hash4u(uint32(x>>8), bTableBits) + e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)} + currHash := hash4u(uint32(x>>16), bTableBits) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x>>16) != candidate.val { + cv = uint32(x >> 24) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go new file mode 100644 index 000000000..1a3ff9b6b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -0,0 +1,231 @@ +package flate + +import "fmt" + +// fastEncL3 +type fastEncL3 struct { + fastGen + table [tableSize]tableEntryPrev +} + +// Encode uses a similar algorithm to level 2, will check up to two candidates. +func (e *fastEncL3) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + } + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + e.table[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // Skip if too small. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + const skipLog = 6 + nextS := s + var candidate tableEntry + for { + nextHash := hash(cv) + s = nextS + nextS = s + 1 + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash] + now := load3232(src, nextS) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}} + + // Check both candidates + candidate = candidates.Cur + offset := s - (candidate.offset - e.cur) + if cv == candidate.val { + if offset > maxMatchOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + o2 := s - (candidates.Prev.offset - e.cur) + if cv != candidates.Prev.val || o2 > maxMatchOffset { + break + } + // Both match and are valid, pick longest. + l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) + if l2 > l1 { + candidate = candidates.Prev + } + break + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + break + } + } + } + cv = now + } + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + nextHash := hash(cv) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + t, val: cv}, + } + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-3 to s. + x := load6432(src, s-3) + prevHash := hash(uint32(x)) + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, + } + x >>= 8 + currHash := hash(uint32(x)) + candidates := e.table[currHash] + cv = uint32(x) + e.table[currHash] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur, val: cv}, + } + + // Check both candidates + candidate = candidates.Cur + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + continue + } + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + continue + } + } + } + cv = uint32(x >> 8) + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go new file mode 100644 index 000000000..f3ecc9c4d --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -0,0 +1,212 @@ +package flate + +import "fmt" + +type fastEncL4 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntry +} + +func (e *fastEncL4) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.bTable[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[nextHashS] = entry + e.bTable[nextHashL] = entry + + t = lCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == lCandidate.val { + // We got a long match. Use that. + break + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + // Found a 4 match... + lCandidate = e.bTable[hash7(next, tableBits)] + + // If the next long is a candidate, check if we should use that instead... + lOff := nextS - (lCandidate.offset - e.cur) + if lOff < maxMatchOffset && lCandidate.val == uint32(next) { + l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) + if l2 > l1 { + s = nextS + t = lCandidate.offset - e.cur + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + if false { + if t >= s { + panic("s-t") + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+8) < len(src) { + cv := load6432(src, s) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} + } + goto emitRemainder + } + + // Store every 3rd hash in-between + if true { + i := nextS + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(t2.val, tableBits)] = t2 + + i += 3 + for ; i < s-1; i += 3 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(t2.val, tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} + e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)} + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go new file mode 100644 index 000000000..4e3916825 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -0,0 +1,279 @@ +package flate + +import "fmt" + +type fastEncL5 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == lCandidate.Cur.val { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if lCandidate.Cur.val == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + if false { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hash4u(t2.val, tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go new file mode 100644 index 000000000..00a311977 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -0,0 +1,282 @@ +package flate + +import "fmt" + +type fastEncL6 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL6) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + // Repeat MUST be > 1 and within range + repeat := int32(1) + for { + const skipLog = 7 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + // Calculate hashes of 'next' + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == lCandidate.Cur.val { + // Long candidate matches at least 4 bytes. + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + + // Check the previous long candidate as well. + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + // Current value did not match, but check if previous long value does. + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + + // Look up next long candidate (at nextS) + lCandidate = e.bTable[nextHashL] + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + + // Check repeat at s + repOff + const repOff = 1 + t2 := s - repeat + repOff + if load3232(src, t2) == uint32(cv>>(8*repOff)) { + ml := e.matchlen(s+4+repOff, t2+4, src) + 4 + if ml > l { + t = t2 + l = ml + s += repOff + // Not worth checking more. + break + } + } + + // If the next long is a candidate, use that... + t2 = lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if lCandidate.Cur.val == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + // This is ok, but check previous as well. + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + if false { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + repeat = s - t + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index after match end. + for i := nextS + 1; i < int32(len(src))-8; i += 2 { + cv := load6432(src, i) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur + } + goto emitRemainder + } + + // Store every long hash in-between and every second short. + if true { + for i := nextS + 1; i < s-1; i += 2 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong2 := &e.bTable[hash7(cv>>8, tableBits)] + e.table[hash4x64(cv, tableBits)] = t + eLong.Cur, eLong.Prev = t, eLong.Cur + eLong2.Cur, eLong2.Prev = t2, eLong2.Cur + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + cv = load6432(src, s) + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go new file mode 100644 index 000000000..53e899124 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -0,0 +1,297 @@ +package flate + +import ( + "io" + "math" + "sync" +) + +const ( + maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 + + slTableBits = 13 + slTableSize = 1 << slTableBits + slTableShift = 32 - slTableBits +) + +type statelessWriter struct { + dst io.Writer + closed bool +} + +func (s *statelessWriter) Close() error { + if s.closed { + return nil + } + s.closed = true + // Emit EOF block + return StatelessDeflate(s.dst, nil, true, nil) +} + +func (s *statelessWriter) Write(p []byte) (n int, err error) { + err = StatelessDeflate(s.dst, p, false, nil) + if err != nil { + return 0, err + } + return len(p), nil +} + +func (s *statelessWriter) Reset(w io.Writer) { + s.dst = w + s.closed = false +} + +// NewStatelessWriter will do compression but without maintaining any state +// between Write calls. +// There will be no memory kept between Write calls, +// but compression and speed will be suboptimal. +// Because of this, the size of actual Write calls will affect output size. +func NewStatelessWriter(dst io.Writer) io.WriteCloser { + return &statelessWriter{dst: dst} +} + +// bitWriterPool contains bit writers that can be reused. +var bitWriterPool = sync.Pool{ + New: func() interface{} { + return newHuffmanBitWriter(nil) + }, +} + +// StatelessDeflate allows to compress directly to a Writer without retaining state. +// When returning everything will be flushed. +// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { + var dst tokens + bw := bitWriterPool.Get().(*huffmanBitWriter) + bw.reset(out) + defer func() { + // don't keep a reference to our output + bw.reset(nil) + bitWriterPool.Put(bw) + }() + if eof && len(in) == 0 { + // Just write an EOF block. + // Could be faster... + bw.writeStoredHeader(0, true) + bw.flush() + return bw.err + } + + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + + for len(in) > 0 { + todo := in + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] + } + in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } + // Compress + statelessEnc(&dst, todo, int16(len(dict))) + isEof := eof && len(in) == 0 + + if dst.n == 0 { + bw.writeStoredHeader(len(uncompressed), isEof) + if bw.err != nil { + return bw.err + } + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { + // If we removed less than 1/16th, huffman compress the block. + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) + } else { + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() + } + if bw.err != nil { + return bw.err + } + } + if !eof { + // Align, only a stored block can do that. + bw.writeStoredHeader(0, false) + } + bw.flush() + return bw.err +} + +func hashSL(u uint32) uint32 { + return (u * 0x1e35a7bd) >> slTableShift +} + +func load3216(b []byte, i int16) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6416(b []byte, i int16) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func statelessEnc(dst *tokens, src []byte, startAt int16) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + type tableEntry struct { + offset int16 + } + + var table [slTableSize]tableEntry + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src)-int(startAt) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = 0 + return + } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } + + s := startAt + 1 + nextEmit := startAt + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int16(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3216(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hashSL(cv) + candidate = table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit || nextS <= 0 { + goto emitRemainder + } + + now := load6416(src, nextS) + table[nextHash] = tableEntry{offset: s} + nextHash = hashSL(uint32(now)) + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = table[nextHash] + now >>= 8 + table[nextHash] = tableEntry{offset: s} + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset + l := int16(matchLen(src[s+4:], src[t+4:]) + 4) + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + // Save the match found + dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6416(src, s-2) + o := s - 2 + prevHash := hashSL(uint32(x)) + table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hashSL(uint32(x)) + candidate = table[currHash] + table[currHash] = tableEntry{offset: o + 2} + + if uint32(x) != load3216(src, candidate.offset) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go new file mode 100644 index 000000000..099c0ddbc --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -0,0 +1,375 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused + // 8 bits: xlength = length - MIN_MATCH_LENGTH + // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<maxnumlit + offHist [32]uint16 // offset codes + litHist [256]uint16 // codes 0->255 + n uint16 // Must be able to contain maxStoreBlockSize + tokens [maxStoreBlockSize + 1]token +} + +func (t *tokens) Reset() { + if t.n == 0 { + return + } + t.n = 0 + t.nLits = 0 + for i := range t.litHist[:] { + t.litHist[i] = 0 + } + for i := range t.extraHist[:] { + t.extraHist[i] = 0 + } + for i := range t.offHist[:] { + t.offHist[i] = 0 + } +} + +func (t *tokens) Fill() { + if t.n == 0 { + return + } + for i, v := range t.litHist[:] { + if v == 0 { + t.litHist[i] = 1 + t.nLits++ + } + } + for i, v := range t.extraHist[:literalCount-256] { + if v == 0 { + t.nLits++ + t.extraHist[i] = 1 + } + } + for i, v := range t.offHist[:offsetCodeCount] { + if v == 0 { + t.offHist[i] = 1 + } + } +} + +func indexTokens(in []token) tokens { + var t tokens + t.indexTokens(in) + return t +} + +func (t *tokens) indexTokens(in []token) { + t.Reset() + for _, tok := range in { + if tok < matchType { + t.AddLiteral(tok.literal()) + continue + } + t.AddMatch(uint32(tok.length()), tok.offset()) + } +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst *tokens, lit []byte) { + ol := int(dst.n) + for i, v := range lit { + dst.tokens[(i+ol)&maxStoreBlockSize] = token(v) + dst.litHist[v]++ + } + dst.n += uint16(len(lit)) + dst.nLits += len(lit) +} + +func (t *tokens) AddLiteral(lit byte) { + t.tokens[t.n] = token(lit) + t.litHist[lit]++ + t.n++ + t.nLits++ +} + +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + +// EstimatedBits will return an minimum size estimated by an *optimal* +// compression of the block. +// The size of the block +func (t *tokens) EstimatedBits() int { + shannon := float32(0) + bits := int(0) + nMatches := 0 + if t.nLits > 0 { + invTotal := 1.0 / float32(t.nLits) + for _, v := range t.litHist[:] { + if v > 0 { + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + } + } + // Just add 15 for EOB + shannon += 15 + for i, v := range t.extraHist[1 : literalCount-256] { + if v > 0 { + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(lengthExtraBits[i&31]) * int(v) + nMatches += int(v) + } + } + } + if nMatches > 0 { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { + if v > 0 { + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(offsetExtraBits[i&31]) * int(v) + } + } + } + return int(shannon) + bits +} + +// AddMatch adds a match to the tokens. +// This function is very sensitive to inlining and right on the border. +func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { + if debugDecode { + if xlength >= maxMatchLength+baseMatchLength { + panic(fmt.Errorf("invalid length: %v", xlength)) + } + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + t.nLits++ + lengthCode := lengthCodes1[uint8(xlength)] & 31 + t.tokens[t.n] = token(matchType | xlength<= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oc := offsetCode(xoffset) & 31 + for xlength > 0 { + xl := xlength + if xl > 258 { + // We need to have at least baseMatchLength left over for next loop. + xl = 258 - baseMatchLength + } + xlength -= xl + xl -= 3 + t.nLits++ + lengthCode := lengthCodes1[uint8(xl)] & 31 + t.tokens[t.n] = token(matchType | uint32(xl)<> lengthShift) } + +// The code is never more than 8 bits, but is returned as uint32 for convenience. +func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + if false { + if off < uint32(len(offsetCodes)) { + return offsetCodes[off&255] + } else if off>>7 < uint32(len(offsetCodes)) { + return offsetCodes[(off>>7)&255] + 14 + } else { + return offsetCodes[(off>>14)&255] + 28 + } + } + if off < uint32(len(offsetCodes)) { + return offsetCodes[uint8(off)] + } + return offsetCodes14[uint8(off>>7)] +} diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go new file mode 100644 index 000000000..568b5d4fb --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -0,0 +1,344 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gzip implements reading and writing of gzip format compressed files, +// as specified in RFC 1952. +package gzip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash/crc32" + "io" + "time" + + "github.com/klauspost/compress/flate" +) + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + flagText = 1 << 0 + flagHdrCrc = 1 << 1 + flagExtra = 1 << 2 + flagName = 1 << 3 + flagComment = 1 << 4 +) + +var ( + // ErrChecksum is returned when reading GZIP data that has an invalid checksum. + ErrChecksum = errors.New("gzip: invalid checksum") + // ErrHeader is returned when reading GZIP data that has an invalid header. + ErrHeader = errors.New("gzip: invalid header") +) + +var le = binary.LittleEndian + +// noEOF converts io.EOF to io.ErrUnexpectedEOF. +func noEOF(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + return err +} + +// The gzip file stores a header giving metadata about the compressed file. +// That header is exposed as the fields of the Writer and Reader structs. +// +// Strings must be UTF-8 encoded and may only contain Unicode code points +// U+0001 through U+00FF, due to limitations of the GZIP file format. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type +} + +// A Reader is an io.Reader that can be read to retrieve +// uncompressed data from a gzip-format compressed file. +// +// In general, a gzip file can be a concatenation of gzip files, +// each with its own header. Reads from the Reader +// return the concatenation of the uncompressed data of each. +// Only the first header is recorded in the Reader fields. +// +// Gzip files store a length and checksum of the uncompressed data. +// The Reader will return a ErrChecksum when Read +// reaches the end of the uncompressed data if it does not +// have the expected length or checksum. Clients should treat data +// returned by Read as tentative until they receive the io.EOF +// marking the end of the data. +type Reader struct { + Header // valid after NewReader or Reader.Reset + r flate.Reader + decompressor io.ReadCloser + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + buf [512]byte + err error + multistream bool +} + +// NewReader creates a new Reader reading the given reader. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// +// It is the caller's responsibility to call Close on the Reader when done. +// +// The Reader.Header fields will be valid in the Reader returned. +func NewReader(r io.Reader) (*Reader, error) { + z := new(Reader) + if err := z.Reset(r); err != nil { + return nil, err + } + return z, nil +} + +// Reset discards the Reader z's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) error { + *z = Reader{ + decompressor: z.decompressor, + multistream: true, + } + if rr, ok := r.(flate.Reader); ok { + z.r = rr + } else { + z.r = bufio.NewReader(r) + } + z.Header, z.err = z.readHeader() + return z.err +} + +// Multistream controls whether the reader supports multistream files. +// +// If enabled (the default), the Reader expects the input to be a sequence +// of individually gzipped data streams, each with its own header and +// trailer, ending at EOF. The effect is that the concatenation of a sequence +// of gzipped files is treated as equivalent to the gzip of the concatenation +// of the sequence. This is standard behavior for gzip readers. +// +// Calling Multistream(false) disables this behavior; disabling the behavior +// can be useful when reading file formats that distinguish individual gzip +// data streams or mix gzip data streams with other data streams. +// In this mode, when the Reader reaches the end of the data stream, +// Read returns io.EOF. If the underlying reader implements io.ByteReader, +// it will be left positioned just after the gzip stream. +// To start the next stream, call z.Reset(r) followed by z.Multistream(false). +// If there is no next stream, z.Reset(r) will return io.EOF. +func (z *Reader) Multistream(ok bool) { + z.multistream = ok +} + +// readString reads a NUL-terminated string from z.r. +// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and +// will output a string encoded using UTF-8. +// This method always updates z.digest with the data read. +func (z *Reader) readString() (string, error) { + var err error + needConv := false + for i := 0; ; i++ { + if i >= len(z.buf) { + return "", ErrHeader + } + z.buf[i], err = z.r.ReadByte() + if err != nil { + return "", err + } + if z.buf[i] > 0x7f { + needConv = true + } + if z.buf[i] == 0 { + // Digest covers the NUL terminator. + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1]) + + // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1). + if needConv { + s := make([]rune, 0, i) + for _, v := range z.buf[:i] { + s = append(s, rune(v)) + } + return string(s), nil + } + return string(z.buf[:i]), nil + } + } +} + +// readHeader reads the GZIP header according to section 2.3.1. +// This method does not set z.err. +func (z *Reader) readHeader() (hdr Header, err error) { + if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil { + // RFC 1952, section 2.2, says the following: + // A gzip file consists of a series of "members" (compressed data sets). + // + // Other than this, the specification does not clarify whether a + // "series" is defined as "one or more" or "zero or more". To err on the + // side of caution, Go interprets this to mean "zero or more". + // Thus, it is okay to return io.EOF here. + return hdr, err + } + if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { + return hdr, ErrHeader + } + flg := z.buf[3] + hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0) + // z.buf[8] is XFL and is currently ignored. + hdr.OS = z.buf[9] + z.digest = crc32.ChecksumIEEE(z.buf[:10]) + + if flg&flagExtra != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2]) + data := make([]byte, le.Uint16(z.buf[:2])) + if _, err = io.ReadFull(z.r, data); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, data) + hdr.Extra = data + } + + var s string + if flg&flagName != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Name = s + } + + if flg&flagComment != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Comment = s + } + + if flg&flagHdrCrc != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + digest := le.Uint16(z.buf[:2]) + if digest != uint16(z.digest) { + return hdr, ErrHeader + } + } + + z.digest = 0 + if z.decompressor == nil { + z.decompressor = flate.NewReader(z.r) + } else { + z.decompressor.(flate.Resetter).Reset(z.r, nil) + } + return hdr, nil +} + +// Read implements io.Reader, reading uncompressed bytes from its underlying Reader. +func (z *Reader) Read(p []byte) (n int, err error) { + if z.err != nil { + return 0, z.err + } + + n, z.err = z.decompressor.Read(p) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) + z.size += uint32(n) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum and size. + if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { + z.err = noEOF(err) + return n, z.err + } + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return n, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return n, io.EOF + } + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + return n, z.err + } + + // Read from next file, if necessary. + if n > 0 { + return n, nil + } + return z.Read(p) +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (z *Reader) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + crcWriter := crc32.NewIEEE() + for { + if z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + + // We write both to output and digest. + mw := io.MultiWriter(w, crcWriter) + n, err := z.decompressor.(io.WriterTo).WriteTo(mw) + total += n + z.size += uint32(n) + if err != nil { + z.err = err + return total, z.err + } + + // Finished file; check checksum + size. + if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return total, err + } + z.digest = crcWriter.Sum32() + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return total, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return total, nil + } + crcWriter.Reset() + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + } +} + +// Close closes the Reader. It does not close the underlying io.Reader. +// In order for the GZIP checksum to be verified, the reader must be +// fully consumed until the io.EOF. +func (z *Reader) Close() error { return z.decompressor.Close() } diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go new file mode 100644 index 000000000..6794cf48f --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -0,0 +1,269 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "errors" + "fmt" + "hash/crc32" + "io" + + "github.com/klauspost/compress/flate" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/gzip" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression + ConstantCompression = flate.ConstantCompression + HuffmanOnly = flate.HuffmanOnly + + // StatelessCompression will do compression but without maintaining any state + // between Write calls. + // There will be no memory kept between Write calls, + // but compression and speed will be suboptimal. + // Because of this, the size of actual Write calls will affect output size. + StatelessCompression = -3 +) + +// A Writer is an io.WriteCloser. +// Writes to a Writer are compressed and written to w. +type Writer struct { + Header // written at first call to Write, Flush, or Close + w io.Writer + level int + wroteHeader bool + compressor *flate.Writer + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + closed bool + buf [10]byte + err error +} + +// NewWriter returns a new Writer. +// Writes to the returned writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +// +// Callers that wish to set the fields in Writer.Header must do so before +// the first call to Write, Flush, or Close. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevel(w, DefaultCompression) + return z +} + +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, or any +// integer value between BestSpeed and BestCompression inclusive. The error +// returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + if level < StatelessCompression || level > BestCompression { + return nil, fmt.Errorf("gzip: invalid compression level: %d", level) + } + z := new(Writer) + z.init(w, level) + return z, nil +} + +func (z *Writer) init(w io.Writer, level int) { + compressor := z.compressor + if level != StatelessCompression { + if compressor != nil { + compressor.Reset(w) + } + } + + *z = Writer{ + Header: Header{ + OS: 255, // unknown + }, + w: w, + level: level, + compressor: compressor, + } +} + +// Reset discards the Writer z's state and makes it equivalent to the +// result of its original state from NewWriter or NewWriterLevel, but +// writing to w instead. This permits reusing a Writer rather than +// allocating a new one. +func (z *Writer) Reset(w io.Writer) { + z.init(w, z.level) +} + +// writeBytes writes a length-prefixed byte slice to z.w. +func (z *Writer) writeBytes(b []byte) error { + if len(b) > 0xffff { + return errors.New("gzip.Write: Extra data is too large") + } + le.PutUint16(z.buf[:2], uint16(len(b))) + _, err := z.w.Write(z.buf[:2]) + if err != nil { + return err + } + _, err = z.w.Write(b) + return err +} + +// writeString writes a UTF-8 string s in GZIP's format to z.w. +// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). +func (z *Writer) writeString(s string) (err error) { + // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. + needconv := false + for _, v := range s { + if v == 0 || v > 0xff { + return errors.New("gzip.Write: non-Latin-1 header string") + } + if v > 0x7f { + needconv = true + } + } + if needconv { + b := make([]byte, 0, len(s)) + for _, v := range s { + b = append(b, byte(v)) + } + _, err = z.w.Write(b) + } else { + _, err = io.WriteString(z.w, s) + } + if err != nil { + return err + } + // GZIP strings are NUL-terminated. + z.buf[0] = 0 + _, err = z.w.Write(z.buf[:1]) + return err +} + +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed. +func (z *Writer) Write(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + var n int + // Write the GZIP header lazily. + if !z.wroteHeader { + z.wroteHeader = true + z.buf[0] = gzipID1 + z.buf[1] = gzipID2 + z.buf[2] = gzipDeflate + z.buf[3] = 0 + if z.Extra != nil { + z.buf[3] |= 0x04 + } + if z.Name != "" { + z.buf[3] |= 0x08 + } + if z.Comment != "" { + z.buf[3] |= 0x10 + } + le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix())) + if z.level == BestCompression { + z.buf[8] = 2 + } else if z.level == BestSpeed { + z.buf[8] = 4 + } else { + z.buf[8] = 0 + } + z.buf[9] = z.OS + n, z.err = z.w.Write(z.buf[:10]) + if z.err != nil { + return n, z.err + } + if z.Extra != nil { + z.err = z.writeBytes(z.Extra) + if z.err != nil { + return n, z.err + } + } + if z.Name != "" { + z.err = z.writeString(z.Name) + if z.err != nil { + return n, z.err + } + } + if z.Comment != "" { + z.err = z.writeString(z.Comment) + if z.err != nil { + return n, z.err + } + } + + if z.compressor == nil && z.level != StatelessCompression { + z.compressor, _ = flate.NewWriter(z.w, z.level) + } + } + z.size += uint32(len(p)) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p) + if z.level == StatelessCompression { + return len(p), flate.StatelessDeflate(z.w, p, false, nil) + } + n, z.err = z.compressor.Write(p) + return n, z.err +} + +// Flush flushes any pending compressed data to the underlying writer. +// +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. Flush does +// not return until the data has been written. If the underlying +// writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (z *Writer) Flush() error { + if z.err != nil { + return z.err + } + if z.closed || z.level == StatelessCompression { + return nil + } + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + z.closed = true + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + if z.level == StatelessCompression { + z.err = flate.StatelessDeflate(z.w, nil, true, nil) + } else { + z.err = z.compressor.Close() + } + if z.err != nil { + return z.err + } + le.PutUint32(z.buf[:4], z.digest) + le.PutUint32(z.buf[4:8], z.size) + _, z.err = z.w.Write(z.buf[:8]) + return z.err +} diff --git a/vendor/github.com/pierrec/lz4/.gitignore b/vendor/github.com/pierrec/lz4/.gitignore new file mode 100644 index 000000000..5e9873504 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/.gitignore @@ -0,0 +1,34 @@ +# Created by https://www.gitignore.io/api/macos + +### macOS ### +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# End of https://www.gitignore.io/api/macos + +cmd/*/*exe +.idea \ No newline at end of file diff --git a/vendor/github.com/pierrec/lz4/.travis.yml b/vendor/github.com/pierrec/lz4/.travis.yml new file mode 100644 index 000000000..fd6c6db71 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/.travis.yml @@ -0,0 +1,24 @@ +language: go + +env: + - GO111MODULE=off + +go: + - 1.9.x + - 1.10.x + - 1.11.x + - 1.12.x + - master + +matrix: + fast_finish: true + allow_failures: + - go: master + +sudo: false + +script: + - go test -v -cpu=2 + - go test -v -cpu=2 -race + - go test -v -cpu=2 -tags noasm + - go test -v -cpu=2 -race -tags noasm diff --git a/vendor/github.com/pierrec/lz4/LICENSE b/vendor/github.com/pierrec/lz4/LICENSE new file mode 100644 index 000000000..bd899d835 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2015, Pierre Curto +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of xxHash nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/pierrec/lz4/README.md b/vendor/github.com/pierrec/lz4/README.md new file mode 100644 index 000000000..4ee388e81 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/README.md @@ -0,0 +1,90 @@ +# lz4 : LZ4 compression in pure Go + +[![GoDoc](https://godoc.org/github.com/pierrec/lz4?status.svg)](https://godoc.org/github.com/pierrec/lz4) +[![Build Status](https://travis-ci.org/pierrec/lz4.svg?branch=master)](https://travis-ci.org/pierrec/lz4) +[![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4) +[![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags) + +## Overview + +This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks. +The implementation is based on the reference C [one](https://github.com/lz4/lz4). + +## Install + +Assuming you have the go toolchain installed: + +``` +go get github.com/pierrec/lz4 +``` + +There is a command line interface tool to compress and decompress LZ4 files. + +``` +go install github.com/pierrec/lz4/cmd/lz4c +``` + +Usage + +``` +Usage of lz4c: + -version + print the program version + +Subcommands: +Compress the given files or from stdin to stdout. +compress [arguments] [ ...] + -bc + enable block checksum + -l int + compression level (0=fastest) + -sc + disable stream checksum + -size string + block max size [64K,256K,1M,4M] (default "4M") + +Uncompress the given files or from stdin to stdout. +uncompress [arguments] [ ...] + +``` + + +## Example + +``` +// Compress and uncompress an input string. +s := "hello world" +r := strings.NewReader(s) + +// The pipe will uncompress the data from the writer. +pr, pw := io.Pipe() +zw := lz4.NewWriter(pw) +zr := lz4.NewReader(pr) + +go func() { + // Compress the input string. + _, _ = io.Copy(zw, r) + _ = zw.Close() // Make sure the writer is closed + _ = pw.Close() // Terminate the pipe +}() + +_, _ = io.Copy(os.Stdout, zr) + +// Output: +// hello world +``` + +## Contributing + +Contributions are very welcome for bug fixing, performance improvements...! + +- Open an issue with a proper description +- Send a pull request with appropriate test case(s) + +## Contributors + +Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors) so far! + +Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder. + +Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code. diff --git a/vendor/github.com/pierrec/lz4/block.go b/vendor/github.com/pierrec/lz4/block.go new file mode 100644 index 000000000..b589af467 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/block.go @@ -0,0 +1,387 @@ +package lz4 + +import ( + "encoding/binary" + "fmt" + "math/bits" +) + +// blockHash hashes the lower 6 bytes into a value < htSize. +func blockHash(x uint64) uint32 { + const prime6bytes = 227718039650203 + return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog)) +} + +// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible. +func CompressBlockBound(n int) int { + return n + n/255 + 16 +} + +// UncompressBlock uncompresses the source buffer into the destination one, +// and returns the uncompressed size. +// +// The destination buffer must be sized appropriately. +// +// An error is returned if the source data is invalid or the destination buffer is too small. +func UncompressBlock(src, dst []byte) (int, error) { + if len(src) == 0 { + return 0, nil + } + if di := decodeBlock(dst, src); di >= 0 { + return di, nil + } + return 0, ErrInvalidSourceShortBuffer +} + +// CompressBlock compresses the source buffer into the destination one. +// This is the fast version of LZ4 compression and also the default one. +// The size of hashTable must be at least 64Kb. +// +// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible. +// +// An error is returned if the destination buffer is too small. +func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { + if len(hashTable) < htSize { + return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize) + } + defer recoverBlock(&err) + + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. + // This significantly speeds up incompressible data and usually has very small impact on compresssion. + // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) + const adaptSkipLog = 7 + sn, dn := len(src)-mfLimit, len(dst) + if sn <= 0 || dn == 0 { + return 0, nil + } + // Prove to the compiler the table has at least htSize elements. + // The compiler can see that "uint32() >> hashShift" cannot be out of bounds. + hashTable = hashTable[:htSize] + + // si: Current position of the search. + // anchor: Position of the current literals. + var si, di, anchor int + + // Fast scan strategy: the hash table only stores the last 4 bytes sequences. + for si < sn { + // Hash the next 6 bytes (sequence)... + match := binary.LittleEndian.Uint64(src[si:]) + h := blockHash(match) + h2 := blockHash(match >> 8) + + // We check a match at s, s+1 and s+2 and pick the first one we get. + // Checking 3 only requires us to load the source one. + ref := hashTable[h] + ref2 := hashTable[h2] + hashTable[h] = si + hashTable[h2] = si + 1 + offset := si - ref + + // If offset <= 0 we got an old entry in the hash table. + if offset <= 0 || offset >= winSize || // Out of window. + uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches. + // No match. Start calculating another hash. + // The processor can usually do this out-of-order. + h = blockHash(match >> 16) + ref = hashTable[h] + + // Check the second match at si+1 + si += 1 + offset = si - ref2 + + if offset <= 0 || offset >= winSize || + uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) { + // No match. Check the third match at si+2 + si += 1 + offset = si - ref + hashTable[h] = si + + if offset <= 0 || offset >= winSize || + uint32(match>>16) != binary.LittleEndian.Uint32(src[ref:]) { + // Skip one extra byte (at si+3) before we check 3 matches again. + si += 2 + (si-anchor)>>adaptSkipLog + continue + } + } + } + + // Match found. + lLen := si - anchor // Literal length. + // We already matched 4 bytes. + mLen := 4 + + // Extend backwards if we can, reducing literals. + tOff := si - offset - 1 + for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] { + si-- + tOff-- + lLen-- + mLen++ + } + + // Add the match length, so we continue search at the end. + // Use mLen to store the offset base. + si, mLen = si+mLen, si+minMatch + + // Find the longest match by looking by batches of 8 bytes. + for si+8 < sn { + x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:]) + if x == 0 { + si += 8 + } else { + // Stop is first non-zero byte. + si += bits.TrailingZeros64(x) >> 3 + break + } + } + + mLen = si - mLen + if mLen < 0xF { + dst[di] = byte(mLen) + } else { + dst[di] = 0xF + } + + // Encode literals length. + if lLen < 0xF { + dst[di] |= byte(lLen << 4) + } else { + dst[di] |= 0xF0 + di++ + l := lLen - 0xF + for ; l >= 0xFF; l -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(l) + } + di++ + + // Literals. + copy(dst[di:di+lLen], src[anchor:anchor+lLen]) + di += lLen + 2 + anchor = si + + // Encode offset. + _ = dst[di] // Bound check elimination. + dst[di-2], dst[di-1] = byte(offset), byte(offset>>8) + + // Encode match length part 2. + if mLen >= 0xF { + for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(mLen) + di++ + } + // Check if we can load next values. + if si >= sn { + break + } + // Hash match end-2 + h = blockHash(binary.LittleEndian.Uint64(src[si-2:])) + hashTable[h] = si - 2 + } + + if anchor == 0 { + // Incompressible. + return 0, nil + } + + // Last literals. + lLen := len(src) - anchor + if lLen < 0xF { + dst[di] = byte(lLen << 4) + } else { + dst[di] = 0xF0 + di++ + for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(lLen) + } + di++ + + // Write the last literals. + if di >= anchor { + // Incompressible. + return 0, nil + } + di += copy(dst[di:di+len(src)-anchor], src[anchor:]) + return di, nil +} + +// blockHash hashes 4 bytes into a value < winSize. +func blockHashHC(x uint32) uint32 { + const hasher uint32 = 2654435761 // Knuth multiplicative hash. + return x * hasher >> (32 - winSizeLog) +} + +// CompressBlockHC compresses the source buffer src into the destination dst +// with max search depth (use 0 or negative value for no max). +// +// CompressBlockHC compression ratio is better than CompressBlock but it is also slower. +// +// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible. +// +// An error is returned if the destination buffer is too small. +func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { + defer recoverBlock(&err) + + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. + // This significantly speeds up incompressible data and usually has very small impact on compresssion. + // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) + const adaptSkipLog = 7 + + sn, dn := len(src)-mfLimit, len(dst) + if sn <= 0 || dn == 0 { + return 0, nil + } + var si, di int + + // hashTable: stores the last position found for a given hash + // chainTable: stores previous positions for a given hash + var hashTable, chainTable [winSize]int + + if depth <= 0 { + depth = winSize + } + + anchor := si + for si < sn { + // Hash the next 4 bytes (sequence). + match := binary.LittleEndian.Uint32(src[si:]) + h := blockHashHC(match) + + // Follow the chain until out of window and give the longest match. + mLen := 0 + offset := 0 + for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] { + // The first (mLen==0) or next byte (mLen>=minMatch) at current match length + // must match to improve on the match length. + if src[next+mLen] != src[si+mLen] { + continue + } + ml := 0 + // Compare the current position with a previous with the same hash. + for ml < sn-si { + x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:]) + if x == 0 { + ml += 8 + } else { + // Stop is first non-zero byte. + ml += bits.TrailingZeros64(x) >> 3 + break + } + } + if ml < minMatch || ml <= mLen { + // Match too small (>adaptSkipLog + continue + } + + // Match found. + // Update hash/chain tables with overlapping bytes: + // si already hashed, add everything from si+1 up to the match length. + winStart := si + 1 + if ws := si + mLen - winSize; ws > winStart { + winStart = ws + } + for si, ml := winStart, si+mLen; si < ml; { + match >>= 8 + match |= uint32(src[si+3]) << 24 + h := blockHashHC(match) + chainTable[si&winMask] = hashTable[h] + hashTable[h] = si + si++ + } + + lLen := si - anchor + si += mLen + mLen -= minMatch // Match length does not include minMatch. + + if mLen < 0xF { + dst[di] = byte(mLen) + } else { + dst[di] = 0xF + } + + // Encode literals length. + if lLen < 0xF { + dst[di] |= byte(lLen << 4) + } else { + dst[di] |= 0xF0 + di++ + l := lLen - 0xF + for ; l >= 0xFF; l -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(l) + } + di++ + + // Literals. + copy(dst[di:di+lLen], src[anchor:anchor+lLen]) + di += lLen + anchor = si + + // Encode offset. + di += 2 + dst[di-2], dst[di-1] = byte(offset), byte(offset>>8) + + // Encode match length part 2. + if mLen >= 0xF { + for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(mLen) + di++ + } + } + + if anchor == 0 { + // Incompressible. + return 0, nil + } + + // Last literals. + lLen := len(src) - anchor + if lLen < 0xF { + dst[di] = byte(lLen << 4) + } else { + dst[di] = 0xF0 + di++ + lLen -= 0xF + for ; lLen >= 0xFF; lLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(lLen) + } + di++ + + // Write the last literals. + if di >= anchor { + // Incompressible. + return 0, nil + } + di += copy(dst[di:di+len(src)-anchor], src[anchor:]) + return di, nil +} diff --git a/vendor/github.com/pierrec/lz4/debug.go b/vendor/github.com/pierrec/lz4/debug.go new file mode 100644 index 000000000..bc5e78d40 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/debug.go @@ -0,0 +1,23 @@ +// +build lz4debug + +package lz4 + +import ( + "fmt" + "os" + "path/filepath" + "runtime" +) + +const debugFlag = true + +func debug(args ...interface{}) { + _, file, line, _ := runtime.Caller(1) + file = filepath.Base(file) + + f := fmt.Sprintf("LZ4: %s:%d %s", file, line, args[0]) + if f[len(f)-1] != '\n' { + f += "\n" + } + fmt.Fprintf(os.Stderr, f, args[1:]...) +} diff --git a/vendor/github.com/pierrec/lz4/debug_stub.go b/vendor/github.com/pierrec/lz4/debug_stub.go new file mode 100644 index 000000000..44211ad96 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/debug_stub.go @@ -0,0 +1,7 @@ +// +build !lz4debug + +package lz4 + +const debugFlag = false + +func debug(args ...interface{}) {} diff --git a/vendor/github.com/pierrec/lz4/decode_amd64.go b/vendor/github.com/pierrec/lz4/decode_amd64.go new file mode 100644 index 000000000..43cc14fbe --- /dev/null +++ b/vendor/github.com/pierrec/lz4/decode_amd64.go @@ -0,0 +1,8 @@ +// +build !appengine +// +build gc +// +build !noasm + +package lz4 + +//go:noescape +func decodeBlock(dst, src []byte) int diff --git a/vendor/github.com/pierrec/lz4/decode_amd64.s b/vendor/github.com/pierrec/lz4/decode_amd64.s new file mode 100644 index 000000000..20fef3975 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/decode_amd64.s @@ -0,0 +1,375 @@ +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// AX scratch +// BX scratch +// CX scratch +// DX token +// +// DI &dst +// SI &src +// R8 &dst + len(dst) +// R9 &src + len(src) +// R11 &dst +// R12 short output end +// R13 short input end +// func decodeBlock(dst, src []byte) int +// using 50 bytes of stack currently +TEXT ·decodeBlock(SB), NOSPLIT, $64-56 + MOVQ dst_base+0(FP), DI + MOVQ DI, R11 + MOVQ dst_len+8(FP), R8 + ADDQ DI, R8 + + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R9 + ADDQ SI, R9 + + // shortcut ends + // short output end + MOVQ R8, R12 + SUBQ $32, R12 + // short input end + MOVQ R9, R13 + SUBQ $16, R13 + +loop: + // for si < len(src) + CMPQ SI, R9 + JGE end + + // token := uint32(src[si]) + MOVBQZX (SI), DX + INCQ SI + + // lit_len = token >> 4 + // if lit_len > 0 + // CX = lit_len + MOVQ DX, CX + SHRQ $4, CX + + // if lit_len != 0xF + CMPQ CX, $0xF + JEQ lit_len_loop_pre + CMPQ DI, R12 + JGE lit_len_loop_pre + CMPQ SI, R13 + JGE lit_len_loop_pre + + // copy shortcut + + // A two-stage shortcut for the most common case: + // 1) If the literal length is 0..14, and there is enough space, + // enter the shortcut and copy 16 bytes on behalf of the literals + // (in the fast mode, only 8 bytes can be safely copied this way). + // 2) Further if the match length is 4..18, copy 18 bytes in a similar + // manner; but we ensure that there's enough space in the output for + // those 18 bytes earlier, upon entering the shortcut (in other words, + // there is a combined check for both stages). + + // copy literal + MOVOU (SI), X0 + MOVOU X0, (DI) + ADDQ CX, DI + ADDQ CX, SI + + MOVQ DX, CX + ANDQ $0xF, CX + + // The second stage: prepare for match copying, decode full info. + // If it doesn't work out, the info won't be wasted. + // offset := uint16(data[:2]) + MOVWQZX (SI), DX + ADDQ $2, SI + + MOVQ DI, AX + SUBQ DX, AX + CMPQ AX, DI + JGT err_short_buf + + // if we can't do the second stage then jump straight to read the + // match length, we already have the offset. + CMPQ CX, $0xF + JEQ match_len_loop_pre + CMPQ DX, $8 + JLT match_len_loop_pre + CMPQ AX, R11 + JLT err_short_buf + + // memcpy(op + 0, match + 0, 8); + MOVQ (AX), BX + MOVQ BX, (DI) + // memcpy(op + 8, match + 8, 8); + MOVQ 8(AX), BX + MOVQ BX, 8(DI) + // memcpy(op +16, match +16, 2); + MOVW 16(AX), BX + MOVW BX, 16(DI) + + ADDQ $4, DI // minmatch + ADDQ CX, DI + + // shortcut complete, load next token + JMP loop + +lit_len_loop_pre: + // if lit_len > 0 + CMPQ CX, $0 + JEQ offset + CMPQ CX, $0xF + JNE copy_literal + +lit_len_loop: + // for src[si] == 0xFF + CMPB (SI), $0xFF + JNE lit_len_finalise + + // bounds check src[si+1] + MOVQ SI, AX + ADDQ $1, AX + CMPQ AX, R9 + JGT err_short_buf + + // lit_len += 0xFF + ADDQ $0xFF, CX + INCQ SI + JMP lit_len_loop + +lit_len_finalise: + // lit_len += int(src[si]) + // si++ + MOVBQZX (SI), AX + ADDQ AX, CX + INCQ SI + +copy_literal: + // bounds check src and dst + MOVQ SI, AX + ADDQ CX, AX + CMPQ AX, R9 + JGT err_short_buf + + MOVQ DI, AX + ADDQ CX, AX + CMPQ AX, R8 + JGT err_short_buf + + // whats a good cut off to call memmove? + CMPQ CX, $16 + JGT memmove_lit + + // if len(dst[di:]) < 16 + MOVQ R8, AX + SUBQ DI, AX + CMPQ AX, $16 + JLT memmove_lit + + // if len(src[si:]) < 16 + MOVQ R9, AX + SUBQ SI, AX + CMPQ AX, $16 + JLT memmove_lit + + MOVOU (SI), X0 + MOVOU X0, (DI) + + JMP finish_lit_copy + +memmove_lit: + // memmove(to, from, len) + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + // spill + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) // need len to inc SI, DI after + MOVB DX, 48(SP) + CALL runtime·memmove(SB) + + // restore registers + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVB 48(SP), DX + + // recalc initial values + MOVQ dst_base+0(FP), R8 + MOVQ R8, R11 + ADDQ dst_len+8(FP), R8 + MOVQ src_base+24(FP), R9 + ADDQ src_len+32(FP), R9 + MOVQ R8, R12 + SUBQ $32, R12 + MOVQ R9, R13 + SUBQ $16, R13 + +finish_lit_copy: + ADDQ CX, SI + ADDQ CX, DI + + CMPQ SI, R9 + JGE end + +offset: + // CX := mLen + // free up DX to use for offset + MOVQ DX, CX + + MOVQ SI, AX + ADDQ $2, AX + CMPQ AX, R9 + JGT err_short_buf + + // offset + // DX := int(src[si]) | int(src[si+1])<<8 + MOVWQZX (SI), DX + ADDQ $2, SI + + // 0 offset is invalid + CMPQ DX, $0 + JEQ err_corrupt + + ANDB $0xF, CX + +match_len_loop_pre: + // if mlen != 0xF + CMPB CX, $0xF + JNE copy_match + +match_len_loop: + // for src[si] == 0xFF + // lit_len += 0xFF + CMPB (SI), $0xFF + JNE match_len_finalise + + // bounds check src[si+1] + MOVQ SI, AX + ADDQ $1, AX + CMPQ AX, R9 + JGT err_short_buf + + ADDQ $0xFF, CX + INCQ SI + JMP match_len_loop + +match_len_finalise: + // lit_len += int(src[si]) + // si++ + MOVBQZX (SI), AX + ADDQ AX, CX + INCQ SI + +copy_match: + // mLen += minMatch + ADDQ $4, CX + + // check we have match_len bytes left in dst + // di+match_len < len(dst) + MOVQ DI, AX + ADDQ CX, AX + CMPQ AX, R8 + JGT err_short_buf + + // DX = offset + // CX = match_len + // BX = &dst + (di - offset) + MOVQ DI, BX + SUBQ DX, BX + + // check BX is within dst + // if BX < &dst + CMPQ BX, R11 + JLT err_short_buf + + // if offset + match_len < di + MOVQ BX, AX + ADDQ CX, AX + CMPQ DI, AX + JGT copy_interior_match + + // AX := len(dst[:di]) + // MOVQ DI, AX + // SUBQ R11, AX + + // copy 16 bytes at a time + // if di-offset < 16 copy 16-(di-offset) bytes to di + // then do the remaining + +copy_match_loop: + // for match_len >= 0 + // dst[di] = dst[i] + // di++ + // i++ + MOVB (BX), AX + MOVB AX, (DI) + INCQ DI + INCQ BX + DECQ CX + + CMPQ CX, $0 + JGT copy_match_loop + + JMP loop + +copy_interior_match: + CMPQ CX, $16 + JGT memmove_match + + // if len(dst[di:]) < 16 + MOVQ R8, AX + SUBQ DI, AX + CMPQ AX, $16 + JLT memmove_match + + MOVOU (BX), X0 + MOVOU X0, (DI) + + ADDQ CX, DI + JMP loop + +memmove_match: + // memmove(to, from, len) + MOVQ DI, 0(SP) + MOVQ BX, 8(SP) + MOVQ CX, 16(SP) + // spill + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) // need len to inc SI, DI after + CALL runtime·memmove(SB) + + // restore registers + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + + // recalc initial values + MOVQ dst_base+0(FP), R8 + MOVQ R8, R11 // TODO: make these sensible numbers + ADDQ dst_len+8(FP), R8 + MOVQ src_base+24(FP), R9 + ADDQ src_len+32(FP), R9 + MOVQ R8, R12 + SUBQ $32, R12 + MOVQ R9, R13 + SUBQ $16, R13 + + ADDQ CX, DI + JMP loop + +err_corrupt: + MOVQ $-1, ret+48(FP) + RET + +err_short_buf: + MOVQ $-2, ret+48(FP) + RET + +end: + SUBQ R11, DI + MOVQ DI, ret+48(FP) + RET diff --git a/vendor/github.com/pierrec/lz4/decode_other.go b/vendor/github.com/pierrec/lz4/decode_other.go new file mode 100644 index 000000000..919888edf --- /dev/null +++ b/vendor/github.com/pierrec/lz4/decode_other.go @@ -0,0 +1,98 @@ +// +build !amd64 appengine !gc noasm + +package lz4 + +func decodeBlock(dst, src []byte) (ret int) { + const hasError = -2 + defer func() { + if recover() != nil { + ret = hasError + } + }() + + var si, di int + for { + // Literals and match lengths (token). + b := int(src[si]) + si++ + + // Literals. + if lLen := b >> 4; lLen > 0 { + switch { + case lLen < 0xF && si+16 < len(src): + // Shortcut 1 + // if we have enough room in src and dst, and the literals length + // is small enough (0..14) then copy all 16 bytes, even if not all + // are part of the literals. + copy(dst[di:], src[si:si+16]) + si += lLen + di += lLen + if mLen := b & 0xF; mLen < 0xF { + // Shortcut 2 + // if the match length (4..18) fits within the literals, then copy + // all 18 bytes, even if not all are part of the literals. + mLen += 4 + if offset := int(src[si]) | int(src[si+1])<<8; mLen <= offset { + i := di - offset + end := i + 18 + if end > len(dst) { + // The remaining buffer may not hold 18 bytes. + // See https://github.com/pierrec/lz4/issues/51. + end = len(dst) + } + copy(dst[di:], dst[i:end]) + si += 2 + di += mLen + continue + } + } + case lLen == 0xF: + for src[si] == 0xFF { + lLen += 0xFF + si++ + } + lLen += int(src[si]) + si++ + fallthrough + default: + copy(dst[di:di+lLen], src[si:si+lLen]) + si += lLen + di += lLen + } + } + if si >= len(src) { + return di + } + + offset := int(src[si]) | int(src[si+1])<<8 + if offset == 0 { + return hasError + } + si += 2 + + // Match. + mLen := b & 0xF + if mLen == 0xF { + for src[si] == 0xFF { + mLen += 0xFF + si++ + } + mLen += int(src[si]) + si++ + } + mLen += minMatch + + // Copy the match. + expanded := dst[di-offset:] + if mLen > offset { + // Efficiently copy the match dst[di-offset:di] into the dst slice. + bytesToCopy := offset * (mLen / offset) + for n := offset; n <= bytesToCopy+offset; n *= 2 { + copy(expanded[n:], expanded[:n]) + } + di += bytesToCopy + mLen -= bytesToCopy + } + di += copy(dst[di:di+mLen], expanded[:mLen]) + } +} diff --git a/vendor/github.com/pierrec/lz4/errors.go b/vendor/github.com/pierrec/lz4/errors.go new file mode 100644 index 000000000..1c45d1813 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/errors.go @@ -0,0 +1,30 @@ +package lz4 + +import ( + "errors" + "fmt" + "os" + rdebug "runtime/debug" +) + +var ( + // ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed + // block is corrupted or the destination buffer is not large enough for the uncompressed data. + ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short") + // ErrInvalid is returned when reading an invalid LZ4 archive. + ErrInvalid = errors.New("lz4: bad magic number") + // ErrBlockDependency is returned when attempting to decompress an archive created with block dependency. + ErrBlockDependency = errors.New("lz4: block dependency not supported") + // ErrUnsupportedSeek is returned when attempting to Seek any way but forward from the current position. + ErrUnsupportedSeek = errors.New("lz4: can only seek forward from io.SeekCurrent") +) + +func recoverBlock(e *error) { + if r := recover(); r != nil && *e == nil { + if debugFlag { + fmt.Fprintln(os.Stderr, r) + rdebug.PrintStack() + } + *e = ErrInvalidSourceShortBuffer + } +} diff --git a/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go new file mode 100644 index 000000000..7a76a6bce --- /dev/null +++ b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go @@ -0,0 +1,223 @@ +// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version). +// (https://github.com/Cyan4973/XXH/) +package xxh32 + +import ( + "encoding/binary" +) + +const ( + prime1 uint32 = 2654435761 + prime2 uint32 = 2246822519 + prime3 uint32 = 3266489917 + prime4 uint32 = 668265263 + prime5 uint32 = 374761393 + + primeMask = 0xFFFFFFFF + prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984 + prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535 +) + +// XXHZero represents an xxhash32 object with seed 0. +type XXHZero struct { + v1 uint32 + v2 uint32 + v3 uint32 + v4 uint32 + totalLen uint64 + buf [16]byte + bufused int +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (xxh XXHZero) Sum(b []byte) []byte { + h32 := xxh.Sum32() + return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24)) +} + +// Reset resets the Hash to its initial state. +func (xxh *XXHZero) Reset() { + xxh.v1 = prime1plus2 + xxh.v2 = prime2 + xxh.v3 = 0 + xxh.v4 = prime1minus + xxh.totalLen = 0 + xxh.bufused = 0 +} + +// Size returns the number of bytes returned by Sum(). +func (xxh *XXHZero) Size() int { + return 4 +} + +// BlockSize gives the minimum number of bytes accepted by Write(). +func (xxh *XXHZero) BlockSize() int { + return 1 +} + +// Write adds input bytes to the Hash. +// It never returns an error. +func (xxh *XXHZero) Write(input []byte) (int, error) { + if xxh.totalLen == 0 { + xxh.Reset() + } + n := len(input) + m := xxh.bufused + + xxh.totalLen += uint64(n) + + r := len(xxh.buf) - m + if n < r { + copy(xxh.buf[m:], input) + xxh.bufused += len(input) + return n, nil + } + + p := 0 + // Causes compiler to work directly from registers instead of stack: + v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4 + if m > 0 { + // some data left from previous update + copy(xxh.buf[xxh.bufused:], input[:r]) + xxh.bufused += len(input) - r + + // fast rotl(13) + buf := xxh.buf[:16] // BCE hint. + v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1 + p = r + xxh.bufused = 0 + } + + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1 + } + xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4 + + copy(xxh.buf[xxh.bufused:], input[p:]) + xxh.bufused += len(input) - p + + return n, nil +} + +// Sum32 returns the 32 bits Hash value. +func (xxh *XXHZero) Sum32() uint32 { + h32 := uint32(xxh.totalLen) + if h32 >= 16 { + h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4) + } else { + h32 += prime5 + } + + p := 0 + n := xxh.bufused + buf := xxh.buf + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3 + h32 = rol17(h32) * prime4 + } + for ; p < n; p++ { + h32 += uint32(buf[p]) * prime5 + h32 = rol11(h32) * prime1 + } + + h32 ^= h32 >> 15 + h32 *= prime2 + h32 ^= h32 >> 13 + h32 *= prime3 + h32 ^= h32 >> 16 + + return h32 +} + +// ChecksumZero returns the 32bits Hash value. +func ChecksumZero(input []byte) uint32 { + n := len(input) + h32 := uint32(n) + + if n < 16 { + h32 += prime5 + } else { + v1 := prime1plus2 + v2 := prime2 + v3 := uint32(0) + v4 := prime1minus + p := 0 + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1 + } + input = input[p:] + n -= p + h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + } + + p := 0 + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3 + h32 = rol17(h32) * prime4 + } + for p < n { + h32 += uint32(input[p]) * prime5 + h32 = rol11(h32) * prime1 + p++ + } + + h32 ^= h32 >> 15 + h32 *= prime2 + h32 ^= h32 >> 13 + h32 *= prime3 + h32 ^= h32 >> 16 + + return h32 +} + +// Uint32Zero hashes x with seed 0. +func Uint32Zero(x uint32) uint32 { + h := prime5 + 4 + x*prime3 + h = rol17(h) * prime4 + h ^= h >> 15 + h *= prime2 + h ^= h >> 13 + h *= prime3 + h ^= h >> 16 + return h +} + +func rol1(u uint32) uint32 { + return u<<1 | u>>31 +} + +func rol7(u uint32) uint32 { + return u<<7 | u>>25 +} + +func rol11(u uint32) uint32 { + return u<<11 | u>>21 +} + +func rol12(u uint32) uint32 { + return u<<12 | u>>20 +} + +func rol13(u uint32) uint32 { + return u<<13 | u>>19 +} + +func rol17(u uint32) uint32 { + return u<<17 | u>>15 +} + +func rol18(u uint32) uint32 { + return u<<18 | u>>14 +} diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go new file mode 100644 index 000000000..29864d8fd --- /dev/null +++ b/vendor/github.com/pierrec/lz4/lz4.go @@ -0,0 +1,113 @@ +// Package lz4 implements reading and writing lz4 compressed data (a frame), +// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html. +// +// Although the block level compression and decompression functions are exposed and are fully compatible +// with the lz4 block format definition, they are low level and should not be used directly. +// For a complete description of an lz4 compressed block, see: +// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html +// +// See https://github.com/Cyan4973/lz4 for the reference C implementation. +// +package lz4 + +import "math/bits" + +import "sync" + +const ( + // Extension is the LZ4 frame file name extension + Extension = ".lz4" + // Version is the LZ4 frame format version + Version = 1 + + frameMagic uint32 = 0x184D2204 + frameSkipMagic uint32 = 0x184D2A50 + + // The following constants are used to setup the compression algorithm. + minMatch = 4 // the minimum size of the match sequence size (4 bytes) + winSizeLog = 16 // LZ4 64Kb window size limit + winSize = 1 << winSizeLog + winMask = winSize - 1 // 64Kb window of previous data for dependent blocks + compressedBlockFlag = 1 << 31 + compressedBlockMask = compressedBlockFlag - 1 + + // hashLog determines the size of the hash table used to quickly find a previous match position. + // Its value influences the compression speed and memory usage, the lower the faster, + // but at the expense of the compression ratio. + // 16 seems to be the best compromise for fast compression. + hashLog = 16 + htSize = 1 << hashLog + + mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes. +) + +// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb. +const ( + blockSize64K = 1 << (16 + 2*iota) + blockSize256K + blockSize1M + blockSize4M +) + +var ( + // Keep a pool of buffers for each valid block sizes. + bsMapValue = [...]*sync.Pool{ + newBufferPool(2 * blockSize64K), + newBufferPool(2 * blockSize256K), + newBufferPool(2 * blockSize1M), + newBufferPool(2 * blockSize4M), + } +) + +// newBufferPool returns a pool for buffers of the given size. +func newBufferPool(size int) *sync.Pool { + return &sync.Pool{ + New: func() interface{} { + return make([]byte, size) + }, + } +} + +// getBuffer returns a buffer to its pool. +func getBuffer(size int) []byte { + idx := blockSizeValueToIndex(size) - 4 + return bsMapValue[idx].Get().([]byte) +} + +// putBuffer returns a buffer to its pool. +func putBuffer(size int, buf []byte) { + if cap(buf) > 0 { + idx := blockSizeValueToIndex(size) - 4 + bsMapValue[idx].Put(buf[:cap(buf)]) + } +} +func blockSizeIndexToValue(i byte) int { + return 1 << (16 + 2*uint(i)) +} +func isValidBlockSize(size int) bool { + const blockSizeMask = blockSize64K | blockSize256K | blockSize1M | blockSize4M + + return size&blockSizeMask > 0 && bits.OnesCount(uint(size)) == 1 +} +func blockSizeValueToIndex(size int) byte { + return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2) +} + +// Header describes the various flags that can be set on a Writer or obtained from a Reader. +// The default values match those of the LZ4 frame format definition +// (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html). +// +// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls. +// It is the caller responsibility to check them if necessary. +type Header struct { + BlockChecksum bool // Compressed blocks checksum flag. + NoChecksum bool // Frame checksum flag. + BlockMaxSize int // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB. + Size uint64 // Frame total size. It is _not_ computed by the Writer. + CompressionLevel int // Compression level (higher is better, use 0 for fastest compression). + done bool // Header processed flag (Read or Write and checked). +} + +func (h *Header) Reset() { + h.done = false +} diff --git a/vendor/github.com/pierrec/lz4/lz4_go1.10.go b/vendor/github.com/pierrec/lz4/lz4_go1.10.go new file mode 100644 index 000000000..9a0fb0070 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/lz4_go1.10.go @@ -0,0 +1,29 @@ +//+build go1.10 + +package lz4 + +import ( + "fmt" + "strings" +) + +func (h Header) String() string { + var s strings.Builder + + s.WriteString(fmt.Sprintf("%T{", h)) + if h.BlockChecksum { + s.WriteString("BlockChecksum: true ") + } + if h.NoChecksum { + s.WriteString("NoChecksum: true ") + } + if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 { + s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs)) + } + if l := h.CompressionLevel; l != 0 { + s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l)) + } + s.WriteByte('}') + + return s.String() +} diff --git a/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go new file mode 100644 index 000000000..12c761a2e --- /dev/null +++ b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go @@ -0,0 +1,29 @@ +//+build !go1.10 + +package lz4 + +import ( + "bytes" + "fmt" +) + +func (h Header) String() string { + var s bytes.Buffer + + s.WriteString(fmt.Sprintf("%T{", h)) + if h.BlockChecksum { + s.WriteString("BlockChecksum: true ") + } + if h.NoChecksum { + s.WriteString("NoChecksum: true ") + } + if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 { + s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs)) + } + if l := h.CompressionLevel; l != 0 { + s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l)) + } + s.WriteByte('}') + + return s.String() +} diff --git a/vendor/github.com/pierrec/lz4/reader.go b/vendor/github.com/pierrec/lz4/reader.go new file mode 100644 index 000000000..87dd72bd0 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/reader.go @@ -0,0 +1,335 @@ +package lz4 + +import ( + "encoding/binary" + "fmt" + "io" + "io/ioutil" + + "github.com/pierrec/lz4/internal/xxh32" +) + +// Reader implements the LZ4 frame decoder. +// The Header is set after the first call to Read(). +// The Header may change between Read() calls in case of concatenated frames. +type Reader struct { + Header + // Handler called when a block has been successfully read. + // It provides the number of bytes read. + OnBlockDone func(size int) + + buf [8]byte // Scrap buffer. + pos int64 // Current position in src. + src io.Reader // Source. + zdata []byte // Compressed data. + data []byte // Uncompressed data. + idx int // Index of unread bytes into data. + checksum xxh32.XXHZero // Frame hash. + skip int64 // Bytes to skip before next read. + dpos int64 // Position in dest +} + +// NewReader returns a new LZ4 frame decoder. +// No access to the underlying io.Reader is performed. +func NewReader(src io.Reader) *Reader { + r := &Reader{src: src} + return r +} + +// readHeader checks the frame magic number and parses the frame descriptoz. +// Skippable frames are supported even as a first frame although the LZ4 +// specifications recommends skippable frames not to be used as first frames. +func (z *Reader) readHeader(first bool) error { + defer z.checksum.Reset() + + buf := z.buf[:] + for { + magic, err := z.readUint32() + if err != nil { + z.pos += 4 + if !first && err == io.ErrUnexpectedEOF { + return io.EOF + } + return err + } + if magic == frameMagic { + break + } + if magic>>8 != frameSkipMagic>>8 { + return ErrInvalid + } + skipSize, err := z.readUint32() + if err != nil { + return err + } + z.pos += 4 + m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize)) + if err != nil { + return err + } + z.pos += m + } + + // Header. + if _, err := io.ReadFull(z.src, buf[:2]); err != nil { + return err + } + z.pos += 8 + + b := buf[0] + if v := b >> 6; v != Version { + return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version) + } + if b>>5&1 == 0 { + return ErrBlockDependency + } + z.BlockChecksum = b>>4&1 > 0 + frameSize := b>>3&1 > 0 + z.NoChecksum = b>>2&1 == 0 + + bmsID := buf[1] >> 4 & 0x7 + if bmsID < 4 || bmsID > 7 { + return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID) + } + bSize := blockSizeIndexToValue(bmsID - 4) + z.BlockMaxSize = bSize + + // Allocate the compressed/uncompressed buffers. + // The compressed buffer cannot exceed the uncompressed one. + if n := 2 * bSize; cap(z.zdata) < n { + z.zdata = make([]byte, n, n) + } + if debugFlag { + debug("header block max size id=%d size=%d", bmsID, bSize) + } + z.zdata = z.zdata[:bSize] + z.data = z.zdata[:cap(z.zdata)][bSize:] + z.idx = len(z.data) + + _, _ = z.checksum.Write(buf[0:2]) + + if frameSize { + buf := buf[:8] + if _, err := io.ReadFull(z.src, buf); err != nil { + return err + } + z.Size = binary.LittleEndian.Uint64(buf) + z.pos += 8 + _, _ = z.checksum.Write(buf) + } + + // Header checksum. + if _, err := io.ReadFull(z.src, buf[:1]); err != nil { + return err + } + z.pos++ + if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] { + return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h) + } + + z.Header.done = true + if debugFlag { + debug("header read: %v", z.Header) + } + + return nil +} + +// Read decompresses data from the underlying source into the supplied buffer. +// +// Since there can be multiple streams concatenated, Header values may +// change between calls to Read(). If that is the case, no data is actually read from +// the underlying io.Reader, to allow for potential input buffer resizing. +func (z *Reader) Read(buf []byte) (int, error) { + if debugFlag { + debug("Read buf len=%d", len(buf)) + } + if !z.Header.done { + if err := z.readHeader(true); err != nil { + return 0, err + } + if debugFlag { + debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d", + len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx) + } + } + + if len(buf) == 0 { + return 0, nil + } + + if z.idx == len(z.data) { + // No data ready for reading, process the next block. + if debugFlag { + debug("reading block from writer") + } + // Reset uncompressed buffer + z.data = z.zdata[:cap(z.zdata)][len(z.zdata):] + + // Block length: 0 = end of frame, highest bit set: uncompressed. + bLen, err := z.readUint32() + if err != nil { + return 0, err + } + z.pos += 4 + + if bLen == 0 { + // End of frame reached. + if !z.NoChecksum { + // Validate the frame checksum. + checksum, err := z.readUint32() + if err != nil { + return 0, err + } + if debugFlag { + debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum) + } + z.pos += 4 + if h := z.checksum.Sum32(); checksum != h { + return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum) + } + } + + // Get ready for the next concatenated frame and keep the position. + pos := z.pos + z.Reset(z.src) + z.pos = pos + + // Since multiple frames can be concatenated, check for more. + return 0, z.readHeader(false) + } + + if debugFlag { + debug("raw block size %d", bLen) + } + if bLen&compressedBlockFlag > 0 { + // Uncompressed block. + bLen &= compressedBlockMask + if debugFlag { + debug("uncompressed block size %d", bLen) + } + if int(bLen) > cap(z.data) { + return 0, fmt.Errorf("lz4: invalid block size: %d", bLen) + } + z.data = z.data[:bLen] + if _, err := io.ReadFull(z.src, z.data); err != nil { + return 0, err + } + z.pos += int64(bLen) + if z.OnBlockDone != nil { + z.OnBlockDone(int(bLen)) + } + + if z.BlockChecksum { + checksum, err := z.readUint32() + if err != nil { + return 0, err + } + z.pos += 4 + + if h := xxh32.ChecksumZero(z.data); h != checksum { + return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum) + } + } + + } else { + // Compressed block. + if debugFlag { + debug("compressed block size %d", bLen) + } + if int(bLen) > cap(z.data) { + return 0, fmt.Errorf("lz4: invalid block size: %d", bLen) + } + zdata := z.zdata[:bLen] + if _, err := io.ReadFull(z.src, zdata); err != nil { + return 0, err + } + z.pos += int64(bLen) + + if z.BlockChecksum { + checksum, err := z.readUint32() + if err != nil { + return 0, err + } + z.pos += 4 + + if h := xxh32.ChecksumZero(zdata); h != checksum { + return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum) + } + } + + n, err := UncompressBlock(zdata, z.data) + if err != nil { + return 0, err + } + z.data = z.data[:n] + if z.OnBlockDone != nil { + z.OnBlockDone(n) + } + } + + if !z.NoChecksum { + _, _ = z.checksum.Write(z.data) + if debugFlag { + debug("current frame checksum %x", z.checksum.Sum32()) + } + } + z.idx = 0 + } + + if z.skip > int64(len(z.data[z.idx:])) { + z.skip -= int64(len(z.data[z.idx:])) + z.dpos += int64(len(z.data[z.idx:])) + z.idx = len(z.data) + return 0, nil + } + + z.idx += int(z.skip) + z.dpos += z.skip + z.skip = 0 + + n := copy(buf, z.data[z.idx:]) + z.idx += n + z.dpos += int64(n) + if debugFlag { + debug("copied %d bytes to input", n) + } + + return n, nil +} + +// Seek implements io.Seeker, but supports seeking forward from the current +// position only. Any other seek will return an error. Allows skipping output +// bytes which aren't needed, which in some scenarios is faster than reading +// and discarding them. +// Note this may cause future calls to Read() to read 0 bytes if all of the +// data they would have returned is skipped. +func (z *Reader) Seek(offset int64, whence int) (int64, error) { + if offset < 0 || whence != io.SeekCurrent { + return z.dpos + z.skip, ErrUnsupportedSeek + } + z.skip += offset + return z.dpos + z.skip, nil +} + +// Reset discards the Reader's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) { + z.Header = Header{} + z.pos = 0 + z.src = r + z.zdata = z.zdata[:0] + z.data = z.data[:0] + z.idx = 0 + z.checksum.Reset() +} + +// readUint32 reads an uint32 into the supplied buffer. +// The idea is to make use of the already allocated buffers avoiding additional allocations. +func (z *Reader) readUint32() (uint32, error) { + buf := z.buf[:4] + _, err := io.ReadFull(z.src, buf) + x := binary.LittleEndian.Uint32(buf) + return x, err +} diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go new file mode 100644 index 000000000..324f1386b --- /dev/null +++ b/vendor/github.com/pierrec/lz4/writer.go @@ -0,0 +1,408 @@ +package lz4 + +import ( + "encoding/binary" + "fmt" + "github.com/pierrec/lz4/internal/xxh32" + "io" + "runtime" +) + +// zResult contains the results of compressing a block. +type zResult struct { + size uint32 // Block header + data []byte // Compressed data + checksum uint32 // Data checksum +} + +// Writer implements the LZ4 frame encoder. +type Writer struct { + Header + // Handler called when a block has been successfully written out. + // It provides the number of bytes written. + OnBlockDone func(size int) + + buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes + dst io.Writer // Destination. + checksum xxh32.XXHZero // Frame checksum. + data []byte // Data to be compressed + buffer for compressed data. + idx int // Index into data. + hashtable [winSize]int // Hash table used in CompressBlock(). + + // For concurrency. + c chan chan zResult // Channel for block compression goroutines and writer goroutine. + err error // Any error encountered while writing to the underlying destination. +} + +// NewWriter returns a new LZ4 frame encoder. +// No access to the underlying io.Writer is performed. +// The supplied Header is checked at the first Write. +// It is ok to change it before the first Write but then not until a Reset() is performed. +func NewWriter(dst io.Writer) *Writer { + z := new(Writer) + z.Reset(dst) + return z +} + +// WithConcurrency sets the number of concurrent go routines used for compression. +// A negative value sets the concurrency to GOMAXPROCS. +func (z *Writer) WithConcurrency(n int) *Writer { + switch { + case n == 0 || n == 1: + z.c = nil + return z + case n < 0: + n = runtime.GOMAXPROCS(0) + } + z.c = make(chan chan zResult, n) + // Writer goroutine managing concurrent block compression goroutines. + go func() { + // Process next block compression item. + for c := range z.c { + // Read the next compressed block result. + // Waiting here ensures that the blocks are output in the order they were sent. + // The incoming channel is always closed as it indicates to the caller that + // the block has been processed. + res := <-c + n := len(res.data) + if n == 0 { + // Notify the block compression routine that we are done with its result. + // This is used when a sentinel block is sent to terminate the compression. + close(c) + return + } + // Write the block. + if err := z.writeUint32(res.size); err != nil && z.err == nil { + z.err = err + } + if _, err := z.dst.Write(res.data); err != nil && z.err == nil { + z.err = err + } + if z.BlockChecksum { + if err := z.writeUint32(res.checksum); err != nil && z.err == nil { + z.err = err + } + } + if isCompressed := res.size&compressedBlockFlag == 0; isCompressed { + // It is now safe to release the buffer as no longer in use by any goroutine. + putBuffer(cap(res.data), res.data) + } + if h := z.OnBlockDone; h != nil { + h(n) + } + close(c) + } + }() + return z +} + +// newBuffers instantiates new buffers which size matches the one in Header. +// The returned buffers are for decompression and compression respectively. +func (z *Writer) newBuffers() { + bSize := z.Header.BlockMaxSize + buf := getBuffer(bSize) + z.data = buf[:bSize] // Uncompressed buffer is the first half. +} + +// freeBuffers puts the writer's buffers back to the pool. +func (z *Writer) freeBuffers() { + // Put the buffer back into the pool, if any. + putBuffer(z.Header.BlockMaxSize, z.data) + z.data = nil +} + +// writeHeader builds and writes the header (magic+header) to the underlying io.Writer. +func (z *Writer) writeHeader() error { + // Default to 4Mb if BlockMaxSize is not set. + if z.Header.BlockMaxSize == 0 { + z.Header.BlockMaxSize = blockSize4M + } + // The only option that needs to be validated. + bSize := z.Header.BlockMaxSize + if !isValidBlockSize(z.Header.BlockMaxSize) { + return fmt.Errorf("lz4: invalid block max size: %d", bSize) + } + // Allocate the compressed/uncompressed buffers. + // The compressed buffer cannot exceed the uncompressed one. + z.newBuffers() + z.idx = 0 + + // Size is optional. + buf := z.buf[:] + + // Set the fixed size data: magic number, block max size and flags. + binary.LittleEndian.PutUint32(buf[0:], frameMagic) + flg := byte(Version << 6) + flg |= 1 << 5 // No block dependency. + if z.Header.BlockChecksum { + flg |= 1 << 4 + } + if z.Header.Size > 0 { + flg |= 1 << 3 + } + if !z.Header.NoChecksum { + flg |= 1 << 2 + } + buf[4] = flg + buf[5] = blockSizeValueToIndex(z.Header.BlockMaxSize) << 4 + + // Current buffer size: magic(4) + flags(1) + block max size (1). + n := 6 + // Optional items. + if z.Header.Size > 0 { + binary.LittleEndian.PutUint64(buf[n:], z.Header.Size) + n += 8 + } + + // The header checksum includes the flags, block max size and optional Size. + buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF) + z.checksum.Reset() + + // Header ready, write it out. + if _, err := z.dst.Write(buf[0 : n+1]); err != nil { + return err + } + z.Header.done = true + if debugFlag { + debug("wrote header %v", z.Header) + } + + return nil +} + +// Write compresses data from the supplied buffer into the underlying io.Writer. +// Write does not return until the data has been written. +func (z *Writer) Write(buf []byte) (int, error) { + if !z.Header.done { + if err := z.writeHeader(); err != nil { + return 0, err + } + } + if debugFlag { + debug("input buffer len=%d index=%d", len(buf), z.idx) + } + + zn := len(z.data) + var n int + for len(buf) > 0 { + if z.idx == 0 && len(buf) >= zn { + // Avoid a copy as there is enough data for a block. + if err := z.compressBlock(buf[:zn]); err != nil { + return n, err + } + n += zn + buf = buf[zn:] + continue + } + // Accumulate the data to be compressed. + m := copy(z.data[z.idx:], buf) + n += m + z.idx += m + buf = buf[m:] + if debugFlag { + debug("%d bytes copied to buf, current index %d", n, z.idx) + } + + if z.idx < len(z.data) { + // Buffer not filled. + if debugFlag { + debug("need more data for compression") + } + return n, nil + } + + // Buffer full. + if err := z.compressBlock(z.data); err != nil { + return n, err + } + z.idx = 0 + } + + return n, nil +} + +// compressBlock compresses a block. +func (z *Writer) compressBlock(data []byte) error { + if !z.NoChecksum { + _, _ = z.checksum.Write(data) + } + + if z.c != nil { + c := make(chan zResult) + z.c <- c // Send now to guarantee order + go writerCompressBlock(c, z.Header, data) + return nil + } + + zdata := z.data[z.Header.BlockMaxSize:cap(z.data)] + // The compressed block size cannot exceed the input's. + var zn int + + if level := z.Header.CompressionLevel; level != 0 { + zn, _ = CompressBlockHC(data, zdata, level) + } else { + zn, _ = CompressBlock(data, zdata, z.hashtable[:]) + } + + var bLen uint32 + if debugFlag { + debug("block compression %d => %d", len(data), zn) + } + if zn > 0 && zn < len(data) { + // Compressible and compressed size smaller than uncompressed: ok! + bLen = uint32(zn) + zdata = zdata[:zn] + } else { + // Uncompressed block. + bLen = uint32(len(data)) | compressedBlockFlag + zdata = data + } + if debugFlag { + debug("block compression to be written len=%d data len=%d", bLen, len(zdata)) + } + + // Write the block. + if err := z.writeUint32(bLen); err != nil { + return err + } + written, err := z.dst.Write(zdata) + if err != nil { + return err + } + if h := z.OnBlockDone; h != nil { + h(written) + } + + if !z.BlockChecksum { + if debugFlag { + debug("current frame checksum %x", z.checksum.Sum32()) + } + return nil + } + checksum := xxh32.ChecksumZero(zdata) + if debugFlag { + debug("block checksum %x", checksum) + defer func() { debug("current frame checksum %x", z.checksum.Sum32()) }() + } + return z.writeUint32(checksum) +} + +// Flush flushes any pending compressed data to the underlying writer. +// Flush does not return until the data has been written. +// If the underlying writer returns an error, Flush returns that error. +func (z *Writer) Flush() error { + if debugFlag { + debug("flush with index %d", z.idx) + } + if z.idx == 0 { + return nil + } + + data := z.data[:z.idx] + z.idx = 0 + if z.c == nil { + return z.compressBlock(data) + } + if !z.NoChecksum { + _, _ = z.checksum.Write(data) + } + c := make(chan zResult) + z.c <- c + writerCompressBlock(c, z.Header, data) + return nil +} + +func (z *Writer) close() error { + if z.c == nil { + return nil + } + // Send a sentinel block (no data to compress) to terminate the writer main goroutine. + c := make(chan zResult) + z.c <- c + c <- zResult{} + // Wait for the main goroutine to complete. + <-c + // At this point the main goroutine has shut down or is about to return. + z.c = nil + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if !z.Header.done { + if err := z.writeHeader(); err != nil { + return err + } + } + if err := z.Flush(); err != nil { + return err + } + if err := z.close(); err != nil { + return err + } + z.freeBuffers() + + if debugFlag { + debug("writing last empty block") + } + if err := z.writeUint32(0); err != nil { + return err + } + if z.NoChecksum { + return nil + } + checksum := z.checksum.Sum32() + if debugFlag { + debug("stream checksum %x", checksum) + } + return z.writeUint32(checksum) +} + +// Reset clears the state of the Writer z such that it is equivalent to its +// initial state from NewWriter, but instead writing to w. +// No access to the underlying io.Writer is performed. +func (z *Writer) Reset(w io.Writer) { + n := cap(z.c) + _ = z.close() + z.freeBuffers() + z.Header.Reset() + z.dst = w + z.checksum.Reset() + z.idx = 0 + z.err = nil + z.WithConcurrency(n) +} + +// writeUint32 writes a uint32 to the underlying writer. +func (z *Writer) writeUint32(x uint32) error { + buf := z.buf[:4] + binary.LittleEndian.PutUint32(buf, x) + _, err := z.dst.Write(buf) + return err +} + +// writerCompressBlock compresses data into a pooled buffer and writes its result +// out to the input channel. +func writerCompressBlock(c chan zResult, header Header, data []byte) { + zdata := getBuffer(header.BlockMaxSize) + // The compressed block size cannot exceed the input's. + var zn int + if level := header.CompressionLevel; level != 0 { + zn, _ = CompressBlockHC(data, zdata, level) + } else { + var hashTable [winSize]int + zn, _ = CompressBlock(data, zdata, hashTable[:]) + } + var res zResult + if zn > 0 && zn < len(data) { + res.size = uint32(zn) + res.data = zdata[:zn] + } else { + res.size = uint32(len(data)) | compressedBlockFlag + res.data = data + } + if header.BlockChecksum { + res.checksum = xxh32.ChecksumZero(res.data) + } + c <- res +} diff --git a/vendor/github.com/ulikunitz/xz/.gitignore b/vendor/github.com/ulikunitz/xz/.gitignore new file mode 100644 index 000000000..e3c2fc2f1 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/.gitignore @@ -0,0 +1,25 @@ +# .gitignore + +TODO.html +README.html + +lzma/writer.txt +lzma/reader.txt + +cmd/gxz/gxz +cmd/xb/xb + +# test executables +*.test + +# profile files +*.out + +# vim swap file +.*.swp + +# executables on windows +*.exe + +# default compression test file +enwik8* diff --git a/vendor/github.com/ulikunitz/xz/LICENSE b/vendor/github.com/ulikunitz/xz/LICENSE new file mode 100644 index 000000000..d32149979 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2014-2020 Ulrich Kunitz +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* My name, Ulrich Kunitz, may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/ulikunitz/xz/README.md b/vendor/github.com/ulikunitz/xz/README.md new file mode 100644 index 000000000..0a2dc8284 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/README.md @@ -0,0 +1,73 @@ +# Package xz + +This Go language package supports the reading and writing of xz +compressed streams. It includes also a gxz command for compressing and +decompressing data. The package is completely written in Go and doesn't +have any dependency on any C code. + +The package is currently under development. There might be bugs and APIs +are not considered stable. At this time the package cannot compete with +the xz tool regarding compression speed and size. The algorithms there +have been developed over a long time and are highly optimized. However +there are a number of improvements planned and I'm very optimistic about +parallel compression and decompression. Stay tuned! + +## Using the API + +The following example program shows how to use the API. + +```go +package main + +import ( + "bytes" + "io" + "log" + "os" + + "github.com/ulikunitz/xz" +) + +func main() { + const text = "The quick brown fox jumps over the lazy dog.\n" + var buf bytes.Buffer + // compress text + w, err := xz.NewWriter(&buf) + if err != nil { + log.Fatalf("xz.NewWriter error %s", err) + } + if _, err := io.WriteString(w, text); err != nil { + log.Fatalf("WriteString error %s", err) + } + if err := w.Close(); err != nil { + log.Fatalf("w.Close error %s", err) + } + // decompress buffer and write output to stdout + r, err := xz.NewReader(&buf) + if err != nil { + log.Fatalf("NewReader error %s", err) + } + if _, err = io.Copy(os.Stdout, r); err != nil { + log.Fatalf("io.Copy error %s", err) + } +} +``` + +## Using the gxz compression tool + +The package includes a gxz command line utility for compression and +decompression. + +Use following command for installation: + + $ go get github.com/ulikunitz/xz/cmd/gxz + +To test it call the following command. + + $ gxz bigfile + +After some time a much smaller file bigfile.xz will replace bigfile. +To decompress it use the following command. + + $ gxz -d bigfile.xz + diff --git a/vendor/github.com/ulikunitz/xz/TODO.md b/vendor/github.com/ulikunitz/xz/TODO.md new file mode 100644 index 000000000..a4224ce14 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/TODO.md @@ -0,0 +1,332 @@ +# TODO list + +## Release v0.5.x + +1. Support check flag in gxz command. + +## Release v0.6 + +1. Review encoder and check for lzma improvements under xz. +2. Fix binary tree matcher. +3. Compare compression ratio with xz tool using comparable parameters + and optimize parameters +4. Do some optimizations + - rename operation action and make it a simple type of size 8 + - make maxMatches, wordSize parameters + - stop searching after a certain length is found (parameter sweetLen) + +## Release v0.7 + +1. Optimize code +2. Do statistical analysis to get linear presets. +3. Test sync.Pool compatability for xz and lzma Writer and Reader +3. Fuzz optimized code. + +## Release v0.8 + +1. Support parallel go routines for writing and reading xz files. +2. Support a ReaderAt interface for xz files with small block sizes. +3. Improve compatibility between gxz and xz +4. Provide manual page for gxz + +## Release v0.9 + +1. Improve documentation +2. Fuzz again + +## Release v1.0 + +1. Full functioning gxz +2. Add godoc URL to README.md (godoc.org) +3. Resolve all issues. +4. Define release candidates. +5. Public announcement. + +## Package lzma + +### Release v0.6 + +- Rewrite Encoder into a simple greedy one-op-at-a-time encoder + including + + simple scan at the dictionary head for the same byte + + use the killer byte (requiring matches to get longer, the first + test should be the byte that would make the match longer) + + +## Optimizations + +- There may be a lot of false sharing in lzma.State; check whether this + can be improved by reorganizing the internal structure of it. +- Check whether batching encoding and decoding improves speed. + +### DAG optimizations + +- Use full buffer to create minimal bit-length above range encoder. +- Might be too slow (see v0.4) + +### Different match finders + +- hashes with 2, 3 characters additional to 4 characters +- binary trees with 2-7 characters (uint64 as key, use uint32 as + pointers into a an array) +- rb-trees with 2-7 characters (uint64 as key, use uint32 as pointers + into an array with bit-steeling for the colors) + +## Release Procedure + +- execute goch -l for all packages; probably with lower param like 0.5. +- check orthography with gospell +- Write release notes in doc/relnotes. +- Update README.md +- xb copyright . in xz directory to ensure all new files have Copyright + header +- VERSION= go generate github.com/ulikunitz/xz/... to update + version files +- Execute test for Linux/amd64, Linux/x86 and Windows/amd64. +- Update TODO.md - write short log entry +- git checkout master && git merge dev +- git tag -a +- git push + +## Log + +### 2020-02-24 + +Release v0.5.7 supports the check-ID None and fixes +[issue #27](https://github.com/ulikunitz/xz/issues/27). + +### 2019-02-20 + +Release v0.5.6 supports the go.mod file. + +### 2018-10-28 + +Release v0.5.5 fixes issues #19 observing ErrLimit outputs. + +### 2017-06-05 + +Release v0.5.4 fixes issues #15 of another problem with the padding size +check for the xz block header. I removed the check completely. + +### 2017-02-15 + +Release v0.5.3 fixes issue #12 regarding the decompression of an empty +XZ stream. Many thanks to Tomasz Kłak, who reported the issue. + +### 2016-12-02 + +Release v0.5.2 became necessary to allow the decoding of xz files with +4-byte padding in the block header. Many thanks to Greg, who reported +the issue. + +### 2016-07-23 + +Release v0.5.1 became necessary to fix problems with 32-bit platforms. +Many thanks to Bruno Brigas, who reported the issue. + +### 2016-07-04 + +Release v0.5 provides improvements to the compressor and provides support for +the decompression of xz files with multiple xz streams. + +### 2016-01-31 + +Another compression rate increase by checking the byte at length of the +best match first, before checking the whole prefix. This makes the +compressor even faster. We have now a large time budget to beat the +compression ratio of the xz tool. For enwik8 we have now over 40 seconds +to reduce the compressed file size for another 7 MiB. + +### 2016-01-30 + +I simplified the encoder. Speed and compression rate increased +dramatically. A high compression rate affects also the decompression +speed. The approach with the buffer and optimizing for operation +compression rate has not been successful. Going for the maximum length +appears to be the best approach. + +### 2016-01-28 + +The release v0.4 is ready. It provides a working xz implementation, +which is rather slow, but works and is interoperable with the xz tool. +It is an important milestone. + +### 2016-01-10 + +I have the first working implementation of an xz reader and writer. I'm +happy about reaching this milestone. + +### 2015-12-02 + +I'm now ready to implement xz because, I have a working LZMA2 +implementation. I decided today that v0.4 will use the slow encoder +using the operations buffer to be able to go back, if I intend to do so. + +### 2015-10-21 + +I have restarted the work on the library. While trying to implement +LZMA2, I discovered that I need to resimplify the encoder and decoder +functions. The option approach is too complicated. Using a limited byte +writer and not caring for written bytes at all and not to try to handle +uncompressed data simplifies the LZMA encoder and decoder much. +Processing uncompressed data and handling limits is a feature of the +LZMA2 format not of LZMA. + +I learned an interesting method from the LZO format. If the last copy is +too far away they are moving the head one 2 bytes and not 1 byte to +reduce processing times. + +### 2015-08-26 + +I have now reimplemented the lzma package. The code is reasonably fast, +but can still be optimized. The next step is to implement LZMA2 and then +xz. + +### 2015-07-05 + +Created release v0.3. The version is the foundation for a full xz +implementation that is the target of v0.4. + +### 2015-06-11 + +The gflag package has been developed because I couldn't use flag and +pflag for a fully compatible support of gzip's and lzma's options. It +seems to work now quite nicely. + +### 2015-06-05 + +The overflow issue was interesting to research, however Henry S. Warren +Jr. Hacker's Delight book was very helpful as usual and had the issue +explained perfectly. Fefe's information on his website was based on the +C FAQ and quite bad, because it didn't address the issue of -MININT == +MININT. + +### 2015-06-04 + +It has been a productive day. I improved the interface of lzma.Reader +and lzma.Writer and fixed the error handling. + +### 2015-06-01 + +By computing the bit length of the LZMA operations I was able to +improve the greedy algorithm implementation. By using an 8 MByte buffer +the compression rate was not as good as for xz but already better then +gzip default. + +Compression is currently slow, but this is something we will be able to +improve over time. + +### 2015-05-26 + +Checked the license of ogier/pflag. The binary lzmago binary should +include the license terms for the pflag library. + +I added the endorsement clause as used by Google for the Go sources the +LICENSE file. + +### 2015-05-22 + +The package lzb contains now the basic implementation for creating or +reading LZMA byte streams. It allows the support for the implementation +of the DAG-shortest-path algorithm for the compression function. + +### 2015-04-23 + +Completed yesterday the lzbase classes. I'm a little bit concerned that +using the components may require too much code, but on the other hand +there is a lot of flexibility. + +### 2015-04-22 + +Implemented Reader and Writer during the Bayern game against Porto. The +second half gave me enough time. + +### 2015-04-21 + +While showering today morning I discovered that the design for OpEncoder +and OpDecoder doesn't work, because encoding/decoding might depend on +the current status of the dictionary. This is not exactly the right way +to start the day. + +Therefore we need to keep the Reader and Writer design. This time around +we simplify it by ignoring size limits. These can be added by wrappers +around the Reader and Writer interfaces. The Parameters type isn't +needed anymore. + +However I will implement a ReaderState and WriterState type to use +static typing to ensure the right State object is combined with the +right lzbase.Reader and lzbase.Writer. + +As a start I have implemented ReaderState and WriterState to ensure +that the state for reading is only used by readers and WriterState only +used by Writers. + +### 2015-04-20 + +Today I implemented the OpDecoder and tested OpEncoder and OpDecoder. + +### 2015-04-08 + +Came up with a new simplified design for lzbase. I implemented already +the type State that replaces OpCodec. + +### 2015-04-06 + +The new lzma package is now fully usable and lzmago is using it now. The +old lzma package has been completely removed. + +### 2015-04-05 + +Implemented lzma.Reader and tested it. + +### 2015-04-04 + +Implemented baseReader by adapting code form lzma.Reader. + +### 2015-04-03 + +The opCodec has been copied yesterday to lzma2. opCodec has a high +number of dependencies on other files in lzma2. Therefore I had to copy +almost all files from lzma. + +### 2015-03-31 + +Removed only a TODO item. + +However in Francesco Campoy's presentation "Go for Javaneros +(Javaïstes?)" is the the idea that using an embedded field E, all the +methods of E will be defined on T. If E is an interface T satisfies E. + +https://talks.golang.org/2014/go4java.slide#51 + +I have never used this, but it seems to be a cool idea. + +### 2015-03-30 + +Finished the type writerDict and wrote a simple test. + +### 2015-03-25 + +I started to implement the writerDict. + +### 2015-03-24 + +After thinking long about the LZMA2 code and several false starts, I +have now a plan to create a self-sufficient lzma2 package that supports +the classic LZMA format as well as LZMA2. The core idea is to support a +baseReader and baseWriter type that support the basic LZMA stream +without any headers. Both types must support the reuse of dictionaries +and the opCodec. + +### 2015-01-10 + +1. Implemented simple lzmago tool +2. Tested tool against large 4.4G file + - compression worked correctly; tested decompression with lzma + - decompression hits a full buffer condition +3. Fixed a bug in the compressor and wrote a test for it +4. Executed full cycle for 4.4 GB file; performance can be improved ;-) + +### 2015-01-11 + +- Release v0.2 because of the working LZMA encoder and decoder diff --git a/vendor/github.com/ulikunitz/xz/bits.go b/vendor/github.com/ulikunitz/xz/bits.go new file mode 100644 index 000000000..364213dd9 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/bits.go @@ -0,0 +1,74 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xz + +import ( + "errors" + "io" +) + +// putUint32LE puts the little-endian representation of x into the first +// four bytes of p. +func putUint32LE(p []byte, x uint32) { + p[0] = byte(x) + p[1] = byte(x >> 8) + p[2] = byte(x >> 16) + p[3] = byte(x >> 24) +} + +// putUint64LE puts the little-endian representation of x into the first +// eight bytes of p. +func putUint64LE(p []byte, x uint64) { + p[0] = byte(x) + p[1] = byte(x >> 8) + p[2] = byte(x >> 16) + p[3] = byte(x >> 24) + p[4] = byte(x >> 32) + p[5] = byte(x >> 40) + p[6] = byte(x >> 48) + p[7] = byte(x >> 56) +} + +// uint32LE converts a little endian representation to an uint32 value. +func uint32LE(p []byte) uint32 { + return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | + uint32(p[3])<<24 +} + +// putUvarint puts a uvarint representation of x into the byte slice. +func putUvarint(p []byte, x uint64) int { + i := 0 + for x >= 0x80 { + p[i] = byte(x) | 0x80 + x >>= 7 + i++ + } + p[i] = byte(x) + return i + 1 +} + +// errOverflow indicates an overflow of the 64-bit unsigned integer. +var errOverflowU64 = errors.New("xz: uvarint overflows 64-bit unsigned integer") + +// readUvarint reads a uvarint from the given byte reader. +func readUvarint(r io.ByteReader) (x uint64, n int, err error) { + var s uint + i := 0 + for { + b, err := r.ReadByte() + if err != nil { + return x, i, err + } + i++ + if b < 0x80 { + if i > 10 || i == 10 && b > 1 { + return x, i, errOverflowU64 + } + return x | uint64(b)< 0 { + k = 4 - k + } + return k +} + +/*** Header ***/ + +// headerMagic stores the magic bytes for the header +var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00} + +// HeaderLen provides the length of the xz file header. +const HeaderLen = 12 + +// Constants for the checksum methods supported by xz. +const ( + None byte = 0x0 + CRC32 = 0x1 + CRC64 = 0x4 + SHA256 = 0xa +) + +// errInvalidFlags indicates that flags are invalid. +var errInvalidFlags = errors.New("xz: invalid flags") + +// verifyFlags returns the error errInvalidFlags if the value is +// invalid. +func verifyFlags(flags byte) error { + switch flags { + case None, CRC32, CRC64, SHA256: + return nil + default: + return errInvalidFlags + } +} + +// flagstrings maps flag values to strings. +var flagstrings = map[byte]string{ + None: "None", + CRC32: "CRC-32", + CRC64: "CRC-64", + SHA256: "SHA-256", +} + +// flagString returns the string representation for the given flags. +func flagString(flags byte) string { + s, ok := flagstrings[flags] + if !ok { + return "invalid" + } + return s +} + +// newHashFunc returns a function that creates hash instances for the +// hash method encoded in flags. +func newHashFunc(flags byte) (newHash func() hash.Hash, err error) { + switch flags { + case None: + newHash = newNoneHash + case CRC32: + newHash = newCRC32 + case CRC64: + newHash = newCRC64 + case SHA256: + newHash = sha256.New + default: + err = errInvalidFlags + } + return +} + +// header provides the actual content of the xz file header: the flags. +type header struct { + flags byte +} + +// Errors returned by readHeader. +var errHeaderMagic = errors.New("xz: invalid header magic bytes") + +// ValidHeader checks whether data is a correct xz file header. The +// length of data must be HeaderLen. +func ValidHeader(data []byte) bool { + var h header + err := h.UnmarshalBinary(data) + return err == nil +} + +// String returns a string representation of the flags. +func (h header) String() string { + return flagString(h.flags) +} + +// UnmarshalBinary reads header from the provided data slice. +func (h *header) UnmarshalBinary(data []byte) error { + // header length + if len(data) != HeaderLen { + return errors.New("xz: wrong file header length") + } + + // magic header + if !bytes.Equal(headerMagic, data[:6]) { + return errHeaderMagic + } + + // checksum + crc := crc32.NewIEEE() + crc.Write(data[6:8]) + if uint32LE(data[8:]) != crc.Sum32() { + return errors.New("xz: invalid checksum for file header") + } + + // stream flags + if data[6] != 0 { + return errInvalidFlags + } + flags := data[7] + if err := verifyFlags(flags); err != nil { + return err + } + + h.flags = flags + return nil +} + +// MarshalBinary generates the xz file header. +func (h *header) MarshalBinary() (data []byte, err error) { + if err = verifyFlags(h.flags); err != nil { + return nil, err + } + + data = make([]byte, 12) + copy(data, headerMagic) + data[7] = h.flags + + crc := crc32.NewIEEE() + crc.Write(data[6:8]) + putUint32LE(data[8:], crc.Sum32()) + + return data, nil +} + +/*** Footer ***/ + +// footerLen defines the length of the footer. +const footerLen = 12 + +// footerMagic contains the footer magic bytes. +var footerMagic = []byte{'Y', 'Z'} + +// footer represents the content of the xz file footer. +type footer struct { + indexSize int64 + flags byte +} + +// String prints a string representation of the footer structure. +func (f footer) String() string { + return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize) +} + +// Minimum and maximum for the size of the index (backward size). +const ( + minIndexSize = 4 + maxIndexSize = (1 << 32) * 4 +) + +// MarshalBinary converts footer values into an xz file footer. Note +// that the footer value is checked for correctness. +func (f *footer) MarshalBinary() (data []byte, err error) { + if err = verifyFlags(f.flags); err != nil { + return nil, err + } + if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) { + return nil, errors.New("xz: index size out of range") + } + if f.indexSize%4 != 0 { + return nil, errors.New( + "xz: index size not aligned to four bytes") + } + + data = make([]byte, footerLen) + + // backward size (index size) + s := (f.indexSize / 4) - 1 + putUint32LE(data[4:], uint32(s)) + // flags + data[9] = f.flags + // footer magic + copy(data[10:], footerMagic) + + // CRC-32 + crc := crc32.NewIEEE() + crc.Write(data[4:10]) + putUint32LE(data, crc.Sum32()) + + return data, nil +} + +// UnmarshalBinary sets the footer value by unmarshalling an xz file +// footer. +func (f *footer) UnmarshalBinary(data []byte) error { + if len(data) != footerLen { + return errors.New("xz: wrong footer length") + } + + // magic bytes + if !bytes.Equal(data[10:], footerMagic) { + return errors.New("xz: footer magic invalid") + } + + // CRC-32 + crc := crc32.NewIEEE() + crc.Write(data[4:10]) + if uint32LE(data) != crc.Sum32() { + return errors.New("xz: footer checksum error") + } + + var g footer + // backward size (index size) + g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4 + + // flags + if data[8] != 0 { + return errInvalidFlags + } + g.flags = data[9] + if err := verifyFlags(g.flags); err != nil { + return err + } + + *f = g + return nil +} + +/*** Block Header ***/ + +// blockHeader represents the content of an xz block header. +type blockHeader struct { + compressedSize int64 + uncompressedSize int64 + filters []filter +} + +// String converts the block header into a string. +func (h blockHeader) String() string { + var buf bytes.Buffer + first := true + if h.compressedSize >= 0 { + fmt.Fprintf(&buf, "compressed size %d", h.compressedSize) + first = false + } + if h.uncompressedSize >= 0 { + if !first { + buf.WriteString(" ") + } + fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize) + first = false + } + for _, f := range h.filters { + if !first { + buf.WriteString(" ") + } + fmt.Fprintf(&buf, "filter %s", f) + first = false + } + return buf.String() +} + +// Masks for the block flags. +const ( + filterCountMask = 0x03 + compressedSizePresent = 0x40 + uncompressedSizePresent = 0x80 + reservedBlockFlags = 0x3C +) + +// errIndexIndicator signals that an index indicator (0x00) has been found +// instead of an expected block header indicator. +var errIndexIndicator = errors.New("xz: found index indicator") + +// readBlockHeader reads the block header. +func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) { + var buf bytes.Buffer + buf.Grow(20) + + // block header size + z, err := io.CopyN(&buf, r, 1) + n = int(z) + if err != nil { + return nil, n, err + } + s := buf.Bytes()[0] + if s == 0 { + return nil, n, errIndexIndicator + } + + // read complete header + headerLen := (int(s) + 1) * 4 + buf.Grow(headerLen - 1) + z, err = io.CopyN(&buf, r, int64(headerLen-1)) + n += int(z) + if err != nil { + return nil, n, err + } + + // unmarshal block header + h = new(blockHeader) + if err = h.UnmarshalBinary(buf.Bytes()); err != nil { + return nil, n, err + } + + return h, n, nil +} + +// readSizeInBlockHeader reads the uncompressed or compressed size +// fields in the block header. The present value informs the function +// whether the respective field is actually present in the header. +func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) { + if !present { + return -1, nil + } + x, _, err := readUvarint(r) + if err != nil { + return 0, err + } + if x >= 1<<63 { + return 0, errors.New("xz: size overflow in block header") + } + return int64(x), nil +} + +// UnmarshalBinary unmarshals the block header. +func (h *blockHeader) UnmarshalBinary(data []byte) error { + // Check header length + s := data[0] + if data[0] == 0 { + return errIndexIndicator + } + headerLen := (int(s) + 1) * 4 + if len(data) != headerLen { + return fmt.Errorf("xz: data length %d; want %d", len(data), + headerLen) + } + n := headerLen - 4 + + // Check CRC-32 + crc := crc32.NewIEEE() + crc.Write(data[:n]) + if crc.Sum32() != uint32LE(data[n:]) { + return errors.New("xz: checksum error for block header") + } + + // Block header flags + flags := data[1] + if flags&reservedBlockFlags != 0 { + return errors.New("xz: reserved block header flags set") + } + + r := bytes.NewReader(data[2:n]) + + // Compressed size + var err error + h.compressedSize, err = readSizeInBlockHeader( + r, flags&compressedSizePresent != 0) + if err != nil { + return err + } + + // Uncompressed size + h.uncompressedSize, err = readSizeInBlockHeader( + r, flags&uncompressedSizePresent != 0) + if err != nil { + return err + } + + h.filters, err = readFilters(r, int(flags&filterCountMask)+1) + if err != nil { + return err + } + + // Check padding + // Since headerLen is a multiple of 4 we don't need to check + // alignment. + k := r.Len() + // The standard spec says that the padding should have not more + // than 3 bytes. However we found paddings of 4 or 5 in the + // wild. See https://github.com/ulikunitz/xz/pull/11 and + // https://github.com/ulikunitz/xz/issues/15 + // + // The only reasonable approach seems to be to ignore the + // padding size. We still check that all padding bytes are zero. + if !allZeros(data[n-k : n]) { + return errPadding + } + return nil +} + +// MarshalBinary marshals the binary header. +func (h *blockHeader) MarshalBinary() (data []byte, err error) { + if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) { + return nil, errors.New("xz: filter count wrong") + } + for i, f := range h.filters { + if i < len(h.filters)-1 { + if f.id() == lzmaFilterID { + return nil, errors.New( + "xz: LZMA2 filter is not the last") + } + } else { + // last filter + if f.id() != lzmaFilterID { + return nil, errors.New("xz: " + + "last filter must be the LZMA2 filter") + } + } + } + + var buf bytes.Buffer + // header size must set at the end + buf.WriteByte(0) + + // flags + flags := byte(len(h.filters) - 1) + if h.compressedSize >= 0 { + flags |= compressedSizePresent + } + if h.uncompressedSize >= 0 { + flags |= uncompressedSizePresent + } + buf.WriteByte(flags) + + p := make([]byte, 10) + if h.compressedSize >= 0 { + k := putUvarint(p, uint64(h.compressedSize)) + buf.Write(p[:k]) + } + if h.uncompressedSize >= 0 { + k := putUvarint(p, uint64(h.uncompressedSize)) + buf.Write(p[:k]) + } + + for _, f := range h.filters { + fp, err := f.MarshalBinary() + if err != nil { + return nil, err + } + buf.Write(fp) + } + + // padding + for i := padLen(int64(buf.Len())); i > 0; i-- { + buf.WriteByte(0) + } + + // crc place holder + buf.Write(p[:4]) + + data = buf.Bytes() + if len(data)%4 != 0 { + panic("data length not aligned") + } + s := len(data)/4 - 1 + if !(1 < s && s <= 255) { + panic("wrong block header size") + } + data[0] = byte(s) + + crc := crc32.NewIEEE() + crc.Write(data[:len(data)-4]) + putUint32LE(data[len(data)-4:], crc.Sum32()) + + return data, nil +} + +// Constants used for marshalling and unmarshalling filters in the xz +// block header. +const ( + minFilters = 1 + maxFilters = 4 + minReservedID = 1 << 62 +) + +// filter represents a filter in the block header. +type filter interface { + id() uint64 + UnmarshalBinary(data []byte) error + MarshalBinary() (data []byte, err error) + reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error) + writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error) + // filter must be last filter + last() bool +} + +// readFilter reads a block filter from the block header. At this point +// in time only the LZMA2 filter is supported. +func readFilter(r io.Reader) (f filter, err error) { + br := lzma.ByteReader(r) + + // index + id, _, err := readUvarint(br) + if err != nil { + return nil, err + } + + var data []byte + switch id { + case lzmaFilterID: + data = make([]byte, lzmaFilterLen) + data[0] = lzmaFilterID + if _, err = io.ReadFull(r, data[1:]); err != nil { + return nil, err + } + f = new(lzmaFilter) + default: + if id >= minReservedID { + return nil, errors.New( + "xz: reserved filter id in block stream header") + } + return nil, errors.New("xz: invalid filter id") + } + if err = f.UnmarshalBinary(data); err != nil { + return nil, err + } + return f, err +} + +// readFilters reads count filters. At this point in time only the count +// 1 is supported. +func readFilters(r io.Reader, count int) (filters []filter, err error) { + if count != 1 { + return nil, errors.New("xz: unsupported filter count") + } + f, err := readFilter(r) + if err != nil { + return nil, err + } + return []filter{f}, err +} + +// writeFilters writes the filters. +func writeFilters(w io.Writer, filters []filter) (n int, err error) { + for _, f := range filters { + p, err := f.MarshalBinary() + if err != nil { + return n, err + } + k, err := w.Write(p) + n += k + if err != nil { + return n, err + } + } + return n, nil +} + +/*** Index ***/ + +// record describes a block in the xz file index. +type record struct { + unpaddedSize int64 + uncompressedSize int64 +} + +// readRecord reads an index record. +func readRecord(r io.ByteReader) (rec record, n int, err error) { + u, k, err := readUvarint(r) + n += k + if err != nil { + return rec, n, err + } + rec.unpaddedSize = int64(u) + if rec.unpaddedSize < 0 { + return rec, n, errors.New("xz: unpadded size negative") + } + + u, k, err = readUvarint(r) + n += k + if err != nil { + return rec, n, err + } + rec.uncompressedSize = int64(u) + if rec.uncompressedSize < 0 { + return rec, n, errors.New("xz: uncompressed size negative") + } + + return rec, n, nil +} + +// MarshalBinary converts an index record in its binary encoding. +func (rec *record) MarshalBinary() (data []byte, err error) { + // maximum length of a uvarint is 10 + p := make([]byte, 20) + n := putUvarint(p, uint64(rec.unpaddedSize)) + n += putUvarint(p[n:], uint64(rec.uncompressedSize)) + return p[:n], nil +} + +// writeIndex writes the index, a sequence of records. +func writeIndex(w io.Writer, index []record) (n int64, err error) { + crc := crc32.NewIEEE() + mw := io.MultiWriter(w, crc) + + // index indicator + k, err := mw.Write([]byte{0}) + n += int64(k) + if err != nil { + return n, err + } + + // number of records + p := make([]byte, 10) + k = putUvarint(p, uint64(len(index))) + k, err = mw.Write(p[:k]) + n += int64(k) + if err != nil { + return n, err + } + + // list of records + for _, rec := range index { + p, err := rec.MarshalBinary() + if err != nil { + return n, err + } + k, err = mw.Write(p) + n += int64(k) + if err != nil { + return n, err + } + } + + // index padding + k, err = mw.Write(make([]byte, padLen(int64(n)))) + n += int64(k) + if err != nil { + return n, err + } + + // crc32 checksum + putUint32LE(p, crc.Sum32()) + k, err = w.Write(p[:4]) + n += int64(k) + + return n, err +} + +// readIndexBody reads the index from the reader. It assumes that the +// index indicator has already been read. +func readIndexBody(r io.Reader) (records []record, n int64, err error) { + crc := crc32.NewIEEE() + // index indicator + crc.Write([]byte{0}) + + br := lzma.ByteReader(io.TeeReader(r, crc)) + + // number of records + u, k, err := readUvarint(br) + n += int64(k) + if err != nil { + return nil, n, err + } + recLen := int(u) + if recLen < 0 || uint64(recLen) != u { + return nil, n, errors.New("xz: record number overflow") + } + + // list of records + records = make([]record, recLen) + for i := range records { + records[i], k, err = readRecord(br) + n += int64(k) + if err != nil { + return nil, n, err + } + } + + p := make([]byte, padLen(int64(n+1)), 4) + k, err = io.ReadFull(br.(io.Reader), p) + n += int64(k) + if err != nil { + return nil, n, err + } + if !allZeros(p) { + return nil, n, errors.New("xz: non-zero byte in index padding") + } + + // crc32 + s := crc.Sum32() + p = p[:4] + k, err = io.ReadFull(br.(io.Reader), p) + n += int64(k) + if err != nil { + return records, n, err + } + if uint32LE(p) != s { + return nil, n, errors.New("xz: wrong checksum for index") + } + + return records, n, nil +} diff --git a/vendor/github.com/ulikunitz/xz/fox-check-none.xz b/vendor/github.com/ulikunitz/xz/fox-check-none.xz new file mode 100644 index 000000000..46043f7dc Binary files /dev/null and b/vendor/github.com/ulikunitz/xz/fox-check-none.xz differ diff --git a/vendor/github.com/ulikunitz/xz/fox.xz b/vendor/github.com/ulikunitz/xz/fox.xz new file mode 100644 index 000000000..4b820bd5a Binary files /dev/null and b/vendor/github.com/ulikunitz/xz/fox.xz differ diff --git a/vendor/github.com/ulikunitz/xz/go.mod b/vendor/github.com/ulikunitz/xz/go.mod new file mode 100644 index 000000000..330b675bd --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/go.mod @@ -0,0 +1,3 @@ +module github.com/ulikunitz/xz + +go 1.12 diff --git a/vendor/github.com/ulikunitz/xz/internal/hash/cyclic_poly.go b/vendor/github.com/ulikunitz/xz/internal/hash/cyclic_poly.go new file mode 100644 index 000000000..f2861ba3f --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/internal/hash/cyclic_poly.go @@ -0,0 +1,181 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hash + +// CyclicPoly provides a cyclic polynomial rolling hash. +type CyclicPoly struct { + h uint64 + p []uint64 + i int +} + +// ror rotates the unsigned 64-bit integer to right. The argument s must be +// less than 64. +func ror(x uint64, s uint) uint64 { + return (x >> s) | (x << (64 - s)) +} + +// NewCyclicPoly creates a new instance of the CyclicPoly structure. The +// argument n gives the number of bytes for which a hash will be executed. +// This number must be positive; the method panics if this isn't the case. +func NewCyclicPoly(n int) *CyclicPoly { + if n < 1 { + panic("argument n must be positive") + } + return &CyclicPoly{p: make([]uint64, 0, n)} +} + +// Len returns the length of the byte sequence for which a hash is generated. +func (r *CyclicPoly) Len() int { + return cap(r.p) +} + +// RollByte hashes the next byte and returns a hash value. The complete becomes +// available after at least Len() bytes have been hashed. +func (r *CyclicPoly) RollByte(x byte) uint64 { + y := hash[x] + if len(r.p) < cap(r.p) { + r.h = ror(r.h, 1) ^ y + r.p = append(r.p, y) + } else { + r.h ^= ror(r.p[r.i], uint(cap(r.p)-1)) + r.h = ror(r.h, 1) ^ y + r.p[r.i] = y + r.i = (r.i + 1) % cap(r.p) + } + return r.h +} + +// Stores the hash for the individual bytes. +var hash = [256]uint64{ + 0x2e4fc3f904065142, 0xc790984cfbc99527, + 0x879f95eb8c62f187, 0x3b61be86b5021ef2, + 0x65a896a04196f0a5, 0xc5b307b80470b59e, + 0xd3bff376a70df14b, 0xc332f04f0b3f1701, + 0x753b5f0e9abf3e0d, 0xb41538fdfe66ef53, + 0x1906a10c2c1c0208, 0xfb0c712a03421c0d, + 0x38be311a65c9552b, 0xfee7ee4ca6445c7e, + 0x71aadeded184f21e, 0xd73426fccda23b2d, + 0x29773fb5fb9600b5, 0xce410261cd32981a, + 0xfe2848b3c62dbc2d, 0x459eaaff6e43e11c, + 0xc13e35fc9c73a887, 0xf30ed5c201e76dbc, + 0xa5f10b3910482cea, 0x2945d59be02dfaad, + 0x06ee334ff70571b5, 0xbabf9d8070f44380, + 0xee3e2e9912ffd27c, 0x2a7118d1ea6b8ea7, + 0x26183cb9f7b1664c, 0xea71dac7da068f21, + 0xea92eca5bd1d0bb7, 0x415595862defcd75, + 0x248a386023c60648, 0x9cf021ab284b3c8a, + 0xfc9372df02870f6c, 0x2b92d693eeb3b3fc, + 0x73e799d139dc6975, 0x7b15ae312486363c, + 0xb70e5454a2239c80, 0x208e3fb31d3b2263, + 0x01f563cabb930f44, 0x2ac4533d2a3240d8, + 0x84231ed1064f6f7c, 0xa9f020977c2a6d19, + 0x213c227271c20122, 0x09fe8a9a0a03d07a, + 0x4236dc75bcaf910c, 0x460a8b2bead8f17e, + 0xd9b27be1aa07055f, 0xd202d5dc4b11c33e, + 0x70adb010543bea12, 0xcdae938f7ea6f579, + 0x3f3d870208672f4d, 0x8e6ccbce9d349536, + 0xe4c0871a389095ae, 0xf5f2a49152bca080, + 0x9a43f9b97269934e, 0xc17b3753cb6f475c, + 0xd56d941e8e206bd4, 0xac0a4f3e525eda00, + 0xa06d5a011912a550, 0x5537ed19537ad1df, + 0xa32fe713d611449d, 0x2a1d05b47c3b579f, + 0x991d02dbd30a2a52, 0x39e91e7e28f93eb0, + 0x40d06adb3e92c9ac, 0x9b9d3afde1c77c97, + 0x9a3f3f41c02c616f, 0x22ecd4ba00f60c44, + 0x0b63d5d801708420, 0x8f227ca8f37ffaec, + 0x0256278670887c24, 0x107e14877dbf540b, + 0x32c19f2786ac1c05, 0x1df5b12bb4bc9c61, + 0xc0cac129d0d4c4e2, 0x9fdb52ee9800b001, + 0x31f601d5d31c48c4, 0x72ff3c0928bcaec7, + 0xd99264421147eb03, 0x535a2d6d38aefcfe, + 0x6ba8b4454a916237, 0xfa39366eaae4719c, + 0x10f00fd7bbb24b6f, 0x5bd23185c76c84d4, + 0xb22c3d7e1b00d33f, 0x3efc20aa6bc830a8, + 0xd61c2503fe639144, 0x30ce625441eb92d3, + 0xe5d34cf359e93100, 0xa8e5aa13f2b9f7a5, + 0x5c2b8d851ca254a6, 0x68fb6c5e8b0d5fdf, + 0xc7ea4872c96b83ae, 0x6dd5d376f4392382, + 0x1be88681aaa9792f, 0xfef465ee1b6c10d9, + 0x1f98b65ed43fcb2e, 0x4d1ca11eb6e9a9c9, + 0x7808e902b3857d0b, 0x171c9c4ea4607972, + 0x58d66274850146df, 0x42b311c10d3981d1, + 0x647fa8c621c41a4c, 0xf472771c66ddfedc, + 0x338d27e3f847b46b, 0x6402ce3da97545ce, + 0x5162db616fc38638, 0x9c83be97bc22a50e, + 0x2d3d7478a78d5e72, 0xe621a9b938fd5397, + 0x9454614eb0f81c45, 0x395fb6e742ed39b6, + 0x77dd9179d06037bf, 0xc478d0fee4d2656d, + 0x35d9d6cb772007af, 0x83a56e92c883f0f6, + 0x27937453250c00a1, 0x27bd6ebc3a46a97d, + 0x9f543bf784342d51, 0xd158f38c48b0ed52, + 0x8dd8537c045f66b4, 0x846a57230226f6d5, + 0x6b13939e0c4e7cdf, 0xfca25425d8176758, + 0x92e5fc6cd52788e6, 0x9992e13d7a739170, + 0x518246f7a199e8ea, 0xf104c2a71b9979c7, + 0x86b3ffaabea4768f, 0x6388061cf3e351ad, + 0x09d9b5295de5bbb5, 0x38bf1638c2599e92, + 0x1d759846499e148d, 0x4c0ff015e5f96ef4, + 0xa41a94cfa270f565, 0x42d76f9cb2326c0b, + 0x0cf385dd3c9c23ba, 0x0508a6c7508d6e7a, + 0x337523aabbe6cf8d, 0x646bb14001d42b12, + 0xc178729d138adc74, 0xf900ef4491f24086, + 0xee1a90d334bb5ac4, 0x9755c92247301a50, + 0xb999bf7c4ff1b610, 0x6aeeb2f3b21e8fc9, + 0x0fa8084cf91ac6ff, 0x10d226cf136e6189, + 0xd302057a07d4fb21, 0x5f03800e20a0fcc3, + 0x80118d4ae46bd210, 0x58ab61a522843733, + 0x51edd575c5432a4b, 0x94ee6ff67f9197f7, + 0x765669e0e5e8157b, 0xa5347830737132f0, + 0x3ba485a69f01510c, 0x0b247d7b957a01c3, + 0x1b3d63449fd807dc, 0x0fdc4721c30ad743, + 0x8b535ed3829b2b14, 0xee41d0cad65d232c, + 0xe6a99ed97a6a982f, 0x65ac6194c202003d, + 0x692accf3a70573eb, 0xcc3c02c3e200d5af, + 0x0d419e8b325914a3, 0x320f160f42c25e40, + 0x00710d647a51fe7a, 0x3c947692330aed60, + 0x9288aa280d355a7a, 0xa1806a9b791d1696, + 0x5d60e38496763da1, 0x6c69e22e613fd0f4, + 0x977fc2a5aadffb17, 0xfb7bd063fc5a94ba, + 0x460c17992cbaece1, 0xf7822c5444d3297f, + 0x344a9790c69b74aa, 0xb80a42e6cae09dce, + 0x1b1361eaf2b1e757, 0xd84c1e758e236f01, + 0x88e0b7be347627cc, 0x45246009b7a99490, + 0x8011c6dd3fe50472, 0xc341d682bffb99d7, + 0x2511be93808e2d15, 0xd5bc13d7fd739840, + 0x2a3cd030679ae1ec, 0x8ad9898a4b9ee157, + 0x3245fef0a8eaf521, 0x3d6d8dbbb427d2b0, + 0x1ed146d8968b3981, 0x0c6a28bf7d45f3fc, + 0x4a1fd3dbcee3c561, 0x4210ff6a476bf67e, + 0xa559cce0d9199aac, 0xde39d47ef3723380, + 0xe5b69d848ce42e35, 0xefa24296f8e79f52, + 0x70190b59db9a5afc, 0x26f166cdb211e7bf, + 0x4deaf2df3c6b8ef5, 0xf171dbdd670f1017, + 0xb9059b05e9420d90, 0x2f0da855c9388754, + 0x611d5e9ab77949cc, 0x2912038ac01163f4, + 0x0231df50402b2fba, 0x45660fc4f3245f58, + 0xb91cc97c7c8dac50, 0xb72d2aafe4953427, + 0xfa6463f87e813d6b, 0x4515f7ee95d5c6a2, + 0x1310e1c1a48d21c3, 0xad48a7810cdd8544, + 0x4d5bdfefd5c9e631, 0xa43ed43f1fdcb7de, + 0xe70cfc8fe1ee9626, 0xef4711b0d8dda442, + 0xb80dd9bd4dab6c93, 0xa23be08d31ba4d93, + 0x9b37db9d0335a39c, 0x494b6f870f5cfebc, + 0x6d1b3c1149dda943, 0x372c943a518c1093, + 0xad27af45e77c09c4, 0x3b6f92b646044604, + 0xac2917909f5fcf4f, 0x2069a60e977e5557, + 0x353a469e71014de5, 0x24be356281f55c15, + 0x2b6d710ba8e9adea, 0x404ad1751c749c29, + 0xed7311bf23d7f185, 0xba4f6976b4acc43e, + 0x32d7198d2bc39000, 0xee667019014d6e01, + 0x494ef3e128d14c83, 0x1f95a152baecd6be, + 0x201648dff1f483a5, 0x68c28550c8384af6, + 0x5fc834a6824a7f48, 0x7cd06cb7365eaf28, + 0xd82bbd95e9b30909, 0x234f0d1694c53f6d, + 0xd2fb7f4a96d83f4a, 0xff0d5da83acac05e, + 0xf8f6b97f5585080a, 0x74236084be57b95b, + 0xa25e40c03bbc36ad, 0x6b6e5c14ce88465b, + 0x4378ffe93e1528c5, 0x94ca92a17118e2d2, +} diff --git a/vendor/github.com/ulikunitz/xz/internal/hash/doc.go b/vendor/github.com/ulikunitz/xz/internal/hash/doc.go new file mode 100644 index 000000000..e28d23be4 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/internal/hash/doc.go @@ -0,0 +1,14 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package hash provides rolling hashes. + +Rolling hashes have to be used for maintaining the positions of n-byte +sequences in the dictionary buffer. + +The package provides currently the Rabin-Karp rolling hash and a Cyclic +Polynomial hash. Both support the Hashes method to be used with an interface. +*/ +package hash diff --git a/vendor/github.com/ulikunitz/xz/internal/hash/rabin_karp.go b/vendor/github.com/ulikunitz/xz/internal/hash/rabin_karp.go new file mode 100644 index 000000000..b8e66d972 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/internal/hash/rabin_karp.go @@ -0,0 +1,66 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hash + +// A is the default constant for Robin-Karp rolling hash. This is a random +// prime. +const A = 0x97b548add41d5da1 + +// RabinKarp supports the computation of a rolling hash. +type RabinKarp struct { + A uint64 + // a^n + aOldest uint64 + h uint64 + p []byte + i int +} + +// NewRabinKarp creates a new RabinKarp value. The argument n defines the +// length of the byte sequence to be hashed. The default constant will will be +// used. +func NewRabinKarp(n int) *RabinKarp { + return NewRabinKarpConst(n, A) +} + +// NewRabinKarpConst creates a new RabinKarp value. The argument n defines the +// length of the byte sequence to be hashed. The argument a provides the +// constant used to compute the hash. +func NewRabinKarpConst(n int, a uint64) *RabinKarp { + if n <= 0 { + panic("number of bytes n must be positive") + } + aOldest := uint64(1) + // There are faster methods. For the small n required by the LZMA + // compressor O(n) is sufficient. + for i := 0; i < n; i++ { + aOldest *= a + } + return &RabinKarp{ + A: a, aOldest: aOldest, + p: make([]byte, 0, n), + } +} + +// Len returns the length of the byte sequence. +func (r *RabinKarp) Len() int { + return cap(r.p) +} + +// RollByte computes the hash after x has been added. +func (r *RabinKarp) RollByte(x byte) uint64 { + if len(r.p) < cap(r.p) { + r.h += uint64(x) + r.h *= r.A + r.p = append(r.p, x) + } else { + r.h -= uint64(r.p[r.i]) * r.aOldest + r.h += uint64(x) + r.h *= r.A + r.p[r.i] = x + r.i = (r.i + 1) % cap(r.p) + } + return r.h +} diff --git a/vendor/github.com/ulikunitz/xz/internal/hash/roller.go b/vendor/github.com/ulikunitz/xz/internal/hash/roller.go new file mode 100644 index 000000000..34c81b38a --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/internal/hash/roller.go @@ -0,0 +1,29 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hash + +// Roller provides an interface for rolling hashes. The hash value will become +// valid after hash has been called Len times. +type Roller interface { + Len() int + RollByte(x byte) uint64 +} + +// Hashes computes all hash values for the array p. Note that the state of the +// roller is changed. +func Hashes(r Roller, p []byte) []uint64 { + n := r.Len() + if len(p) < n { + return nil + } + h := make([]uint64, len(p)-n+1) + for i := 0; i < n-1; i++ { + r.RollByte(p[i]) + } + for i := range h { + h[i] = r.RollByte(p[i+n-1]) + } + return h +} diff --git a/vendor/github.com/ulikunitz/xz/internal/xlog/xlog.go b/vendor/github.com/ulikunitz/xz/internal/xlog/xlog.go new file mode 100644 index 000000000..678b5a058 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/internal/xlog/xlog.go @@ -0,0 +1,457 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package xlog provides a simple logging package that allows to disable +// certain message categories. It defines a type, Logger, with multiple +// methods for formatting output. The package has also a predefined +// 'standard' Logger accessible through helper function Print[f|ln], +// Fatal[f|ln], Panic[f|ln], Warn[f|ln], Print[f|ln] and Debug[f|ln] +// that are easier to use then creating a Logger manually. That logger +// writes to standard error and prints the date and time of each logged +// message, which can be configured using the function SetFlags. +// +// The Fatal functions call os.Exit(1) after the message is output +// unless not suppressed by the flags. The Panic functions call panic +// after the writing the log message unless suppressed. +package xlog + +import ( + "fmt" + "io" + "os" + "runtime" + "sync" + "time" +) + +// The flags define what information is prefixed to each log entry +// generated by the Logger. The Lno* versions allow the suppression of +// specific output. The bits are or'ed together to control what will be +// printed. There is no control over the order of the items printed and +// the format. The full format is: +// +// 2009-01-23 01:23:23.123123 /a/b/c/d.go:23: message +// +const ( + Ldate = 1 << iota // the date: 2009-01-23 + Ltime // the time: 01:23:23 + Lmicroseconds // microsecond resolution: 01:23:23.123123 + Llongfile // full file name and line number: /a/b/c/d.go:23 + Lshortfile // final file name element and line number: d.go:23 + Lnopanic // suppresses output from Panic[f|ln] but not the panic call + Lnofatal // suppresses output from Fatal[f|ln] but not the exit + Lnowarn // suppresses output from Warn[f|ln] + Lnoprint // suppresses output from Print[f|ln] + Lnodebug // suppresses output from Debug[f|ln] + // initial values for the standard logger + Lstdflags = Ldate | Ltime | Lnodebug +) + +// A Logger represents an active logging object that generates lines of +// output to an io.Writer. Each logging operation if not suppressed +// makes a single call to the Writer's Write method. A Logger can be +// used simultaneously from multiple goroutines; it guarantees to +// serialize access to the Writer. +type Logger struct { + mu sync.Mutex // ensures atomic writes; and protects the following + // fields + prefix string // prefix to write at beginning of each line + flag int // properties + out io.Writer // destination for output + buf []byte // for accumulating text to write +} + +// New creates a new Logger. The out argument sets the destination to +// which the log output will be written. The prefix appears at the +// beginning of each log line. The flag argument defines the logging +// properties. +func New(out io.Writer, prefix string, flag int) *Logger { + return &Logger{out: out, prefix: prefix, flag: flag} +} + +// std is the standard logger used by the package scope functions. +var std = New(os.Stderr, "", Lstdflags) + +// itoa converts the integer to ASCII. A negative widths will avoid +// zero-padding. The function supports only non-negative integers. +func itoa(buf *[]byte, i int, wid int) { + var u = uint(i) + if u == 0 && wid <= 1 { + *buf = append(*buf, '0') + return + } + var b [32]byte + bp := len(b) + for ; u > 0 || wid > 0; u /= 10 { + bp-- + wid-- + b[bp] = byte(u%10) + '0' + } + *buf = append(*buf, b[bp:]...) +} + +// formatHeader puts the header into the buf field of the buffer. +func (l *Logger) formatHeader(t time.Time, file string, line int) { + l.buf = append(l.buf, l.prefix...) + if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 { + if l.flag&Ldate != 0 { + year, month, day := t.Date() + itoa(&l.buf, year, 4) + l.buf = append(l.buf, '-') + itoa(&l.buf, int(month), 2) + l.buf = append(l.buf, '-') + itoa(&l.buf, day, 2) + l.buf = append(l.buf, ' ') + } + if l.flag&(Ltime|Lmicroseconds) != 0 { + hour, min, sec := t.Clock() + itoa(&l.buf, hour, 2) + l.buf = append(l.buf, ':') + itoa(&l.buf, min, 2) + l.buf = append(l.buf, ':') + itoa(&l.buf, sec, 2) + if l.flag&Lmicroseconds != 0 { + l.buf = append(l.buf, '.') + itoa(&l.buf, t.Nanosecond()/1e3, 6) + } + l.buf = append(l.buf, ' ') + } + } + if l.flag&(Lshortfile|Llongfile) != 0 { + if l.flag&Lshortfile != 0 { + short := file + for i := len(file) - 1; i > 0; i-- { + if file[i] == '/' { + short = file[i+1:] + break + } + } + file = short + } + l.buf = append(l.buf, file...) + l.buf = append(l.buf, ':') + itoa(&l.buf, line, -1) + l.buf = append(l.buf, ": "...) + } +} + +func (l *Logger) output(calldepth int, now time.Time, s string) error { + var file string + var line int + if l.flag&(Lshortfile|Llongfile) != 0 { + l.mu.Unlock() + var ok bool + _, file, line, ok = runtime.Caller(calldepth) + if !ok { + file = "???" + line = 0 + } + l.mu.Lock() + } + l.buf = l.buf[:0] + l.formatHeader(now, file, line) + l.buf = append(l.buf, s...) + if len(s) == 0 || s[len(s)-1] != '\n' { + l.buf = append(l.buf, '\n') + } + _, err := l.out.Write(l.buf) + return err +} + +// Output writes the string s with the header controlled by the flags to +// the l.out writer. A newline will be appended if s doesn't end in a +// newline. Calldepth is used to recover the PC, although all current +// calls of Output use the call depth 2. Access to the function is serialized. +func (l *Logger) Output(calldepth, noflag int, v ...interface{}) error { + now := time.Now() + l.mu.Lock() + defer l.mu.Unlock() + if l.flag&noflag != 0 { + return nil + } + s := fmt.Sprint(v...) + return l.output(calldepth+1, now, s) +} + +// Outputf works like output but formats the output like Printf. +func (l *Logger) Outputf(calldepth int, noflag int, format string, v ...interface{}) error { + now := time.Now() + l.mu.Lock() + defer l.mu.Unlock() + if l.flag&noflag != 0 { + return nil + } + s := fmt.Sprintf(format, v...) + return l.output(calldepth+1, now, s) +} + +// Outputln works like output but formats the output like Println. +func (l *Logger) Outputln(calldepth int, noflag int, v ...interface{}) error { + now := time.Now() + l.mu.Lock() + defer l.mu.Unlock() + if l.flag&noflag != 0 { + return nil + } + s := fmt.Sprintln(v...) + return l.output(calldepth+1, now, s) +} + +// Panic prints the message like Print and calls panic. The printing +// might be suppressed by the flag Lnopanic. +func (l *Logger) Panic(v ...interface{}) { + l.Output(2, Lnopanic, v...) + s := fmt.Sprint(v...) + panic(s) +} + +// Panic prints the message like Print and calls panic. The printing +// might be suppressed by the flag Lnopanic. +func Panic(v ...interface{}) { + std.Output(2, Lnopanic, v...) + s := fmt.Sprint(v...) + panic(s) +} + +// Panicf prints the message like Printf and calls panic. The printing +// might be suppressed by the flag Lnopanic. +func (l *Logger) Panicf(format string, v ...interface{}) { + l.Outputf(2, Lnopanic, format, v...) + s := fmt.Sprintf(format, v...) + panic(s) +} + +// Panicf prints the message like Printf and calls panic. The printing +// might be suppressed by the flag Lnopanic. +func Panicf(format string, v ...interface{}) { + std.Outputf(2, Lnopanic, format, v...) + s := fmt.Sprintf(format, v...) + panic(s) +} + +// Panicln prints the message like Println and calls panic. The printing +// might be suppressed by the flag Lnopanic. +func (l *Logger) Panicln(v ...interface{}) { + l.Outputln(2, Lnopanic, v...) + s := fmt.Sprintln(v...) + panic(s) +} + +// Panicln prints the message like Println and calls panic. The printing +// might be suppressed by the flag Lnopanic. +func Panicln(v ...interface{}) { + std.Outputln(2, Lnopanic, v...) + s := fmt.Sprintln(v...) + panic(s) +} + +// Fatal prints the message like Print and calls os.Exit(1). The +// printing might be suppressed by the flag Lnofatal. +func (l *Logger) Fatal(v ...interface{}) { + l.Output(2, Lnofatal, v...) + os.Exit(1) +} + +// Fatal prints the message like Print and calls os.Exit(1). The +// printing might be suppressed by the flag Lnofatal. +func Fatal(v ...interface{}) { + std.Output(2, Lnofatal, v...) + os.Exit(1) +} + +// Fatalf prints the message like Printf and calls os.Exit(1). The +// printing might be suppressed by the flag Lnofatal. +func (l *Logger) Fatalf(format string, v ...interface{}) { + l.Outputf(2, Lnofatal, format, v...) + os.Exit(1) +} + +// Fatalf prints the message like Printf and calls os.Exit(1). The +// printing might be suppressed by the flag Lnofatal. +func Fatalf(format string, v ...interface{}) { + std.Outputf(2, Lnofatal, format, v...) + os.Exit(1) +} + +// Fatalln prints the message like Println and calls os.Exit(1). The +// printing might be suppressed by the flag Lnofatal. +func (l *Logger) Fatalln(format string, v ...interface{}) { + l.Outputln(2, Lnofatal, v...) + os.Exit(1) +} + +// Fatalln prints the message like Println and calls os.Exit(1). The +// printing might be suppressed by the flag Lnofatal. +func Fatalln(format string, v ...interface{}) { + std.Outputln(2, Lnofatal, v...) + os.Exit(1) +} + +// Warn prints the message like Print. The printing might be suppressed +// by the flag Lnowarn. +func (l *Logger) Warn(v ...interface{}) { + l.Output(2, Lnowarn, v...) +} + +// Warn prints the message like Print. The printing might be suppressed +// by the flag Lnowarn. +func Warn(v ...interface{}) { + std.Output(2, Lnowarn, v...) +} + +// Warnf prints the message like Printf. The printing might be suppressed +// by the flag Lnowarn. +func (l *Logger) Warnf(format string, v ...interface{}) { + l.Outputf(2, Lnowarn, format, v...) +} + +// Warnf prints the message like Printf. The printing might be suppressed +// by the flag Lnowarn. +func Warnf(format string, v ...interface{}) { + std.Outputf(2, Lnowarn, format, v...) +} + +// Warnln prints the message like Println. The printing might be suppressed +// by the flag Lnowarn. +func (l *Logger) Warnln(v ...interface{}) { + l.Outputln(2, Lnowarn, v...) +} + +// Warnln prints the message like Println. The printing might be suppressed +// by the flag Lnowarn. +func Warnln(v ...interface{}) { + std.Outputln(2, Lnowarn, v...) +} + +// Print prints the message like fmt.Print. The printing might be suppressed +// by the flag Lnoprint. +func (l *Logger) Print(v ...interface{}) { + l.Output(2, Lnoprint, v...) +} + +// Print prints the message like fmt.Print. The printing might be suppressed +// by the flag Lnoprint. +func Print(v ...interface{}) { + std.Output(2, Lnoprint, v...) +} + +// Printf prints the message like fmt.Printf. The printing might be suppressed +// by the flag Lnoprint. +func (l *Logger) Printf(format string, v ...interface{}) { + l.Outputf(2, Lnoprint, format, v...) +} + +// Printf prints the message like fmt.Printf. The printing might be suppressed +// by the flag Lnoprint. +func Printf(format string, v ...interface{}) { + std.Outputf(2, Lnoprint, format, v...) +} + +// Println prints the message like fmt.Println. The printing might be +// suppressed by the flag Lnoprint. +func (l *Logger) Println(v ...interface{}) { + l.Outputln(2, Lnoprint, v...) +} + +// Println prints the message like fmt.Println. The printing might be +// suppressed by the flag Lnoprint. +func Println(v ...interface{}) { + std.Outputln(2, Lnoprint, v...) +} + +// Debug prints the message like Print. The printing might be suppressed +// by the flag Lnodebug. +func (l *Logger) Debug(v ...interface{}) { + l.Output(2, Lnodebug, v...) +} + +// Debug prints the message like Print. The printing might be suppressed +// by the flag Lnodebug. +func Debug(v ...interface{}) { + std.Output(2, Lnodebug, v...) +} + +// Debugf prints the message like Printf. The printing might be suppressed +// by the flag Lnodebug. +func (l *Logger) Debugf(format string, v ...interface{}) { + l.Outputf(2, Lnodebug, format, v...) +} + +// Debugf prints the message like Printf. The printing might be suppressed +// by the flag Lnodebug. +func Debugf(format string, v ...interface{}) { + std.Outputf(2, Lnodebug, format, v...) +} + +// Debugln prints the message like Println. The printing might be suppressed +// by the flag Lnodebug. +func (l *Logger) Debugln(v ...interface{}) { + l.Outputln(2, Lnodebug, v...) +} + +// Debugln prints the message like Println. The printing might be suppressed +// by the flag Lnodebug. +func Debugln(v ...interface{}) { + std.Outputln(2, Lnodebug, v...) +} + +// Flags returns the current flags used by the logger. +func (l *Logger) Flags() int { + l.mu.Lock() + defer l.mu.Unlock() + return l.flag +} + +// Flags returns the current flags used by the standard logger. +func Flags() int { + return std.Flags() +} + +// SetFlags sets the flags of the logger. +func (l *Logger) SetFlags(flag int) { + l.mu.Lock() + defer l.mu.Unlock() + l.flag = flag +} + +// SetFlags sets the flags for the standard logger. +func SetFlags(flag int) { + std.SetFlags(flag) +} + +// Prefix returns the prefix used by the logger. +func (l *Logger) Prefix() string { + l.mu.Lock() + defer l.mu.Unlock() + return l.prefix +} + +// Prefix returns the prefix used by the standard logger of the package. +func Prefix() string { + return std.Prefix() +} + +// SetPrefix sets the prefix for the logger. +func (l *Logger) SetPrefix(prefix string) { + l.mu.Lock() + defer l.mu.Unlock() + l.prefix = prefix +} + +// SetPrefix sets the prefix of the standard logger of the package. +func SetPrefix(prefix string) { + std.SetPrefix(prefix) +} + +// SetOutput sets the output of the logger. +func (l *Logger) SetOutput(w io.Writer) { + l.mu.Lock() + defer l.mu.Unlock() + l.out = w +} + +// SetOutput sets the output for the standard logger of the package. +func SetOutput(w io.Writer) { + std.SetOutput(w) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/bintree.go b/vendor/github.com/ulikunitz/xz/lzma/bintree.go new file mode 100644 index 000000000..58d6a92a7 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/bintree.go @@ -0,0 +1,523 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "bufio" + "errors" + "fmt" + "io" + "unicode" +) + +// node represents a node in the binary tree. +type node struct { + // x is the search value + x uint32 + // p parent node + p uint32 + // l left child + l uint32 + // r right child + r uint32 +} + +// wordLen is the number of bytes represented by the v field of a node. +const wordLen = 4 + +// binTree supports the identification of the next operation based on a +// binary tree. +// +// Nodes will be identified by their index into the ring buffer. +type binTree struct { + dict *encoderDict + // ring buffer of nodes + node []node + // absolute offset of the entry for the next node. Position 4 + // byte larger. + hoff int64 + // front position in the node ring buffer + front uint32 + // index of the root node + root uint32 + // current x value + x uint32 + // preallocated array + data []byte +} + +// null represents the nonexistent index. We can't use zero because it +// would always exist or we would need to decrease the index for each +// reference. +const null uint32 = 1<<32 - 1 + +// newBinTree initializes the binTree structure. The capacity defines +// the size of the buffer and defines the maximum distance for which +// matches will be found. +func newBinTree(capacity int) (t *binTree, err error) { + if capacity < 1 { + return nil, errors.New( + "newBinTree: capacity must be larger than zero") + } + if int64(capacity) >= int64(null) { + return nil, errors.New( + "newBinTree: capacity must less 2^{32}-1") + } + t = &binTree{ + node: make([]node, capacity), + hoff: -int64(wordLen), + root: null, + data: make([]byte, maxMatchLen), + } + return t, nil +} + +func (t *binTree) SetDict(d *encoderDict) { t.dict = d } + +// WriteByte writes a single byte into the binary tree. +func (t *binTree) WriteByte(c byte) error { + t.x = (t.x << 8) | uint32(c) + t.hoff++ + if t.hoff < 0 { + return nil + } + v := t.front + if int64(v) < t.hoff { + // We are overwriting old nodes stored in the tree. + t.remove(v) + } + t.node[v].x = t.x + t.add(v) + t.front++ + if int64(t.front) >= int64(len(t.node)) { + t.front = 0 + } + return nil +} + +// Writes writes a sequence of bytes into the binTree structure. +func (t *binTree) Write(p []byte) (n int, err error) { + for _, c := range p { + t.WriteByte(c) + } + return len(p), nil +} + +// add puts the node v into the tree. The node must not be part of the +// tree before. +func (t *binTree) add(v uint32) { + vn := &t.node[v] + // Set left and right to null indices. + vn.l, vn.r = null, null + // If the binary tree is empty make v the root. + if t.root == null { + t.root = v + vn.p = null + return + } + x := vn.x + p := t.root + // Search for the right leave link and add the new node. + for { + pn := &t.node[p] + if x <= pn.x { + if pn.l == null { + pn.l = v + vn.p = p + return + } + p = pn.l + } else { + if pn.r == null { + pn.r = v + vn.p = p + return + } + p = pn.r + } + } +} + +// parent returns the parent node index of v and the pointer to v value +// in the parent. +func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) { + if t.root == v { + return null, &t.root + } + p = t.node[v].p + if t.node[p].l == v { + ptr = &t.node[p].l + } else { + ptr = &t.node[p].r + } + return +} + +// Remove node v. +func (t *binTree) remove(v uint32) { + vn := &t.node[v] + p, ptr := t.parent(v) + l, r := vn.l, vn.r + if l == null { + // Move the right child up. + *ptr = r + if r != null { + t.node[r].p = p + } + return + } + if r == null { + // Move the left child up. + *ptr = l + t.node[l].p = p + return + } + + // Search the in-order predecessor u. + un := &t.node[l] + ur := un.r + if ur == null { + // In order predecessor is l. Move it up. + un.r = r + t.node[r].p = l + un.p = p + *ptr = l + return + } + var u uint32 + for { + // Look for the max value in the tree where l is root. + u = ur + ur = t.node[u].r + if ur == null { + break + } + } + // replace u with ul + un = &t.node[u] + ul := un.l + up := un.p + t.node[up].r = ul + if ul != null { + t.node[ul].p = up + } + + // replace v by u + un.l, un.r = l, r + t.node[l].p = u + t.node[r].p = u + *ptr = u + un.p = p +} + +// search looks for the node that have the value x or for the nodes that +// brace it. The node highest in the tree with the value x will be +// returned. All other nodes with the same value live in left subtree of +// the returned node. +func (t *binTree) search(v uint32, x uint32) (a, b uint32) { + a, b = null, null + if v == null { + return + } + for { + vn := &t.node[v] + if x <= vn.x { + if x == vn.x { + return v, v + } + b = v + if vn.l == null { + return + } + v = vn.l + } else { + a = v + if vn.r == null { + return + } + v = vn.r + } + } +} + +// max returns the node with maximum value in the subtree with v as +// root. +func (t *binTree) max(v uint32) uint32 { + if v == null { + return null + } + for { + r := t.node[v].r + if r == null { + return v + } + v = r + } +} + +// min returns the node with the minimum value in the subtree with v as +// root. +func (t *binTree) min(v uint32) uint32 { + if v == null { + return null + } + for { + l := t.node[v].l + if l == null { + return v + } + v = l + } +} + +// pred returns the in-order predecessor of node v. +func (t *binTree) pred(v uint32) uint32 { + if v == null { + return null + } + u := t.max(t.node[v].l) + if u != null { + return u + } + for { + p := t.node[v].p + if p == null { + return null + } + if t.node[p].r == v { + return p + } + v = p + } +} + +// succ returns the in-order successor of node v. +func (t *binTree) succ(v uint32) uint32 { + if v == null { + return null + } + u := t.min(t.node[v].r) + if u != null { + return u + } + for { + p := t.node[v].p + if p == null { + return null + } + if t.node[p].l == v { + return p + } + v = p + } +} + +// xval converts the first four bytes of a into an 32-bit unsigned +// integer in big-endian order. +func xval(a []byte) uint32 { + var x uint32 + switch len(a) { + default: + x |= uint32(a[3]) + fallthrough + case 3: + x |= uint32(a[2]) << 8 + fallthrough + case 2: + x |= uint32(a[1]) << 16 + fallthrough + case 1: + x |= uint32(a[0]) << 24 + case 0: + } + return x +} + +// dumpX converts value x into a four-letter string. +func dumpX(x uint32) string { + a := make([]byte, 4) + for i := 0; i < 4; i++ { + c := byte(x >> uint((3-i)*8)) + if unicode.IsGraphic(rune(c)) { + a[i] = c + } else { + a[i] = '.' + } + } + return string(a) +} + +// dumpNode writes a representation of the node v into the io.Writer. +func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) { + if v == null { + return + } + + vn := &t.node[v] + + t.dumpNode(w, vn.r, indent+2) + + for i := 0; i < indent; i++ { + fmt.Fprint(w, " ") + } + if vn.p == null { + fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x)) + } else { + fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p) + } + + t.dumpNode(w, vn.l, indent+2) +} + +// dump prints a representation of the binary tree into the writer. +func (t *binTree) dump(w io.Writer) error { + bw := bufio.NewWriter(w) + t.dumpNode(bw, t.root, 0) + return bw.Flush() +} + +func (t *binTree) distance(v uint32) int { + dist := int(t.front) - int(v) + if dist <= 0 { + dist += len(t.node) + } + return dist +} + +type matchParams struct { + rep [4]uint32 + // length when match will be accepted + nAccept int + // nodes to check + check int + // finish if length get shorter + stopShorter bool +} + +func (t *binTree) match(m match, distIter func() (int, bool), p matchParams, +) (r match, checked int, accepted bool) { + buf := &t.dict.buf + for { + if checked >= p.check { + return m, checked, true + } + dist, ok := distIter() + if !ok { + return m, checked, false + } + checked++ + if m.n > 0 { + i := buf.rear - dist + m.n - 1 + if i < 0 { + i += len(buf.data) + } else if i >= len(buf.data) { + i -= len(buf.data) + } + if buf.data[i] != t.data[m.n-1] { + if p.stopShorter { + return m, checked, false + } + continue + } + } + n := buf.matchLen(dist, t.data) + switch n { + case 0: + if p.stopShorter { + return m, checked, false + } + continue + case 1: + if uint32(dist-minDistance) != p.rep[0] { + continue + } + } + if n < m.n || (n == m.n && int64(dist) >= m.distance) { + continue + } + m = match{int64(dist), n} + if n >= p.nAccept { + return m, checked, true + } + } +} + +func (t *binTree) NextOp(rep [4]uint32) operation { + // retrieve maxMatchLen data + n, _ := t.dict.buf.Peek(t.data[:maxMatchLen]) + if n == 0 { + panic("no data in buffer") + } + t.data = t.data[:n] + + var ( + m match + x, u, v uint32 + iterPred, iterSucc func() (int, bool) + ) + p := matchParams{ + rep: rep, + nAccept: maxMatchLen, + check: 32, + } + i := 4 + iterSmall := func() (dist int, ok bool) { + i-- + if i <= 0 { + return 0, false + } + return i, true + } + m, checked, accepted := t.match(m, iterSmall, p) + if accepted { + goto end + } + p.check -= checked + x = xval(t.data) + u, v = t.search(t.root, x) + if u == v && len(t.data) == 4 { + iter := func() (dist int, ok bool) { + if u == null { + return 0, false + } + dist = t.distance(u) + u, v = t.search(t.node[u].l, x) + if u != v { + u = null + } + return dist, true + } + m, _, _ = t.match(m, iter, p) + goto end + } + p.stopShorter = true + iterSucc = func() (dist int, ok bool) { + if v == null { + return 0, false + } + dist = t.distance(v) + v = t.succ(v) + return dist, true + } + m, checked, accepted = t.match(m, iterSucc, p) + if accepted { + goto end + } + p.check -= checked + iterPred = func() (dist int, ok bool) { + if u == null { + return 0, false + } + dist = t.distance(u) + u = t.pred(u) + return dist, true + } + m, _, _ = t.match(m, iterPred, p) +end: + if m.n == 0 { + return lit{t.data[0]} + } + return m +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/bitops.go b/vendor/github.com/ulikunitz/xz/lzma/bitops.go new file mode 100644 index 000000000..2784ec6ba --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/bitops.go @@ -0,0 +1,45 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +/* Naming conventions follows the CodeReviewComments in the Go Wiki. */ + +// ntz32Const is used by the functions NTZ and NLZ. +const ntz32Const = 0x04d7651f + +// ntz32Table is a helper table for de Bruijn algorithm by Danny Dubé. +// See Henry S. Warren, Jr. "Hacker's Delight" section 5-1 figure 5-26. +var ntz32Table = [32]int8{ + 0, 1, 2, 24, 3, 19, 6, 25, + 22, 4, 20, 10, 16, 7, 12, 26, + 31, 23, 18, 5, 21, 9, 15, 11, + 30, 17, 8, 14, 29, 13, 28, 27, +} + +// ntz32 computes the number of trailing zeros for an unsigned 32-bit integer. +func ntz32(x uint32) int { + if x == 0 { + return 32 + } + x = (x & -x) * ntz32Const + return int(ntz32Table[x>>27]) +} + +// nlz32 computes the number of leading zeros for an unsigned 32-bit integer. +func nlz32(x uint32) int { + // Smear left most bit to the right + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + // Use ntz mechanism to calculate nlz. + x++ + if x == 0 { + return 0 + } + x *= ntz32Const + return 32 - int(ntz32Table[x>>27]) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/breader.go b/vendor/github.com/ulikunitz/xz/lzma/breader.go new file mode 100644 index 000000000..4ad09a14e --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/breader.go @@ -0,0 +1,39 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" +) + +// breader provides the ReadByte function for a Reader. It doesn't read +// more data from the reader than absolutely necessary. +type breader struct { + io.Reader + // helper slice to save allocations + p []byte +} + +// ByteReader converts an io.Reader into an io.ByteReader. +func ByteReader(r io.Reader) io.ByteReader { + br, ok := r.(io.ByteReader) + if !ok { + return &breader{r, make([]byte, 1)} + } + return br +} + +// ReadByte read byte function. +func (r *breader) ReadByte() (c byte, err error) { + n, err := r.Reader.Read(r.p) + if n < 1 { + if err == nil { + err = errors.New("breader.ReadByte: no data") + } + return 0, err + } + return r.p[0], nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/buffer.go b/vendor/github.com/ulikunitz/xz/lzma/buffer.go new file mode 100644 index 000000000..9cb7838ac --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/buffer.go @@ -0,0 +1,171 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" +) + +// buffer provides a circular buffer of bytes. If the front index equals +// the rear index the buffer is empty. As a consequence front cannot be +// equal rear for a full buffer. So a full buffer has a length that is +// one byte less the the length of the data slice. +type buffer struct { + data []byte + front int + rear int +} + +// newBuffer creates a buffer with the given size. +func newBuffer(size int) *buffer { + return &buffer{data: make([]byte, size+1)} +} + +// Cap returns the capacity of the buffer. +func (b *buffer) Cap() int { + return len(b.data) - 1 +} + +// Resets the buffer. The front and rear index are set to zero. +func (b *buffer) Reset() { + b.front = 0 + b.rear = 0 +} + +// Buffered returns the number of bytes buffered. +func (b *buffer) Buffered() int { + delta := b.front - b.rear + if delta < 0 { + delta += len(b.data) + } + return delta +} + +// Available returns the number of bytes available for writing. +func (b *buffer) Available() int { + delta := b.rear - 1 - b.front + if delta < 0 { + delta += len(b.data) + } + return delta +} + +// addIndex adds a non-negative integer to the index i and returns the +// resulting index. The function takes care of wrapping the index as +// well as potential overflow situations. +func (b *buffer) addIndex(i int, n int) int { + // subtraction of len(b.data) prevents overflow + i += n - len(b.data) + if i < 0 { + i += len(b.data) + } + return i +} + +// Read reads bytes from the buffer into p and returns the number of +// bytes read. The function never returns an error but might return less +// data than requested. +func (b *buffer) Read(p []byte) (n int, err error) { + n, err = b.Peek(p) + b.rear = b.addIndex(b.rear, n) + return n, err +} + +// Peek reads bytes from the buffer into p without changing the buffer. +// Peek will never return an error but might return less data than +// requested. +func (b *buffer) Peek(p []byte) (n int, err error) { + m := b.Buffered() + n = len(p) + if m < n { + n = m + p = p[:n] + } + k := copy(p, b.data[b.rear:]) + if k < n { + copy(p[k:], b.data) + } + return n, nil +} + +// Discard skips the n next bytes to read from the buffer, returning the +// bytes discarded. +// +// If Discards skips fewer than n bytes, it returns an error. +func (b *buffer) Discard(n int) (discarded int, err error) { + if n < 0 { + return 0, errors.New("buffer.Discard: negative argument") + } + m := b.Buffered() + if m < n { + n = m + err = errors.New( + "buffer.Discard: discarded less bytes then requested") + } + b.rear = b.addIndex(b.rear, n) + return n, err +} + +// ErrNoSpace indicates that there is insufficient space for the Write +// operation. +var ErrNoSpace = errors.New("insufficient space") + +// Write puts data into the buffer. If less bytes are written than +// requested ErrNoSpace is returned. +func (b *buffer) Write(p []byte) (n int, err error) { + m := b.Available() + n = len(p) + if m < n { + n = m + p = p[:m] + err = ErrNoSpace + } + k := copy(b.data[b.front:], p) + if k < n { + copy(b.data, p[k:]) + } + b.front = b.addIndex(b.front, n) + return n, err +} + +// WriteByte writes a single byte into the buffer. The error ErrNoSpace +// is returned if no single byte is available in the buffer for writing. +func (b *buffer) WriteByte(c byte) error { + if b.Available() < 1 { + return ErrNoSpace + } + b.data[b.front] = c + b.front = b.addIndex(b.front, 1) + return nil +} + +// prefixLen returns the length of the common prefix of a and b. +func prefixLen(a, b []byte) int { + if len(a) > len(b) { + a, b = b, a + } + for i, c := range a { + if b[i] != c { + return i + } + } + return len(a) +} + +// matchLen returns the length of the common prefix for the given +// distance from the rear and the byte slice p. +func (b *buffer) matchLen(distance int, p []byte) int { + var n int + i := b.rear - distance + if i < 0 { + if n = prefixLen(p, b.data[len(b.data)+i:]); n < -i { + return n + } + p = p[n:] + i = 0 + } + n += prefixLen(p, b.data[i:]) + return n +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go b/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go new file mode 100644 index 000000000..290606ddc --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go @@ -0,0 +1,37 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" +) + +// ErrLimit indicates that the limit of the LimitedByteWriter has been +// reached. +var ErrLimit = errors.New("limit reached") + +// LimitedByteWriter provides a byte writer that can be written until a +// limit is reached. The field N provides the number of remaining +// bytes. +type LimitedByteWriter struct { + BW io.ByteWriter + N int64 +} + +// WriteByte writes a single byte to the limited byte writer. It returns +// ErrLimit if the limit has been reached. If the byte is successfully +// written the field N of the LimitedByteWriter will be decremented by +// one. +func (l *LimitedByteWriter) WriteByte(c byte) error { + if l.N <= 0 { + return ErrLimit + } + if err := l.BW.WriteByte(c); err != nil { + return err + } + l.N-- + return nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/decoder.go b/vendor/github.com/ulikunitz/xz/lzma/decoder.go new file mode 100644 index 000000000..e5a760a50 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/decoder.go @@ -0,0 +1,277 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "io" +) + +// decoder decodes a raw LZMA stream without any header. +type decoder struct { + // dictionary; the rear pointer of the buffer will be used for + // reading the data. + Dict *decoderDict + // decoder state + State *state + // range decoder + rd *rangeDecoder + // start stores the head value of the dictionary for the LZMA + // stream + start int64 + // size of uncompressed data + size int64 + // end-of-stream encountered + eos bool + // EOS marker found + eosMarker bool +} + +// newDecoder creates a new decoder instance. The parameter size provides +// the expected byte size of the decompressed data. If the size is +// unknown use a negative value. In that case the decoder will look for +// a terminating end-of-stream marker. +func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) { + rd, err := newRangeDecoder(br) + if err != nil { + return nil, err + } + d = &decoder{ + State: state, + Dict: dict, + rd: rd, + size: size, + start: dict.pos(), + } + return d, nil +} + +// Reopen restarts the decoder with a new byte reader and a new size. Reopen +// resets the Decompressed counter to zero. +func (d *decoder) Reopen(br io.ByteReader, size int64) error { + var err error + if d.rd, err = newRangeDecoder(br); err != nil { + return err + } + d.start = d.Dict.pos() + d.size = size + d.eos = false + return nil +} + +// decodeLiteral decodes a single literal from the LZMA stream. +func (d *decoder) decodeLiteral() (op operation, err error) { + litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head) + match := d.Dict.byteAt(int(d.State.rep[0]) + 1) + s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState) + if err != nil { + return nil, err + } + return lit{s}, nil +} + +// errEOS indicates that an EOS marker has been found. +var errEOS = errors.New("EOS marker found") + +// readOp decodes the next operation from the compressed stream. It +// returns the operation. If an explicit end of stream marker is +// identified the eos error is returned. +func (d *decoder) readOp() (op operation, err error) { + // Value of the end of stream (EOS) marker + const eosDist = 1<<32 - 1 + + state, state2, posState := d.State.states(d.Dict.head) + + b, err := d.State.isMatch[state2].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + // literal + op, err := d.decodeLiteral() + if err != nil { + return nil, err + } + d.State.updateStateLiteral() + return op, nil + } + b, err = d.State.isRep[state].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + // simple match + d.State.rep[3], d.State.rep[2], d.State.rep[1] = + d.State.rep[2], d.State.rep[1], d.State.rep[0] + + d.State.updateStateMatch() + // The length decoder returns the length offset. + n, err := d.State.lenCodec.Decode(d.rd, posState) + if err != nil { + return nil, err + } + // The dist decoder returns the distance offset. The actual + // distance is 1 higher. + d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n) + if err != nil { + return nil, err + } + if d.State.rep[0] == eosDist { + d.eosMarker = true + return nil, errEOS + } + op = match{n: int(n) + minMatchLen, + distance: int64(d.State.rep[0]) + minDistance} + return op, nil + } + b, err = d.State.isRepG0[state].Decode(d.rd) + if err != nil { + return nil, err + } + dist := d.State.rep[0] + if b == 0 { + // rep match 0 + b, err = d.State.isRepG0Long[state2].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + d.State.updateStateShortRep() + op = match{n: 1, distance: int64(dist) + minDistance} + return op, nil + } + } else { + b, err = d.State.isRepG1[state].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + dist = d.State.rep[1] + } else { + b, err = d.State.isRepG2[state].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + dist = d.State.rep[2] + } else { + dist = d.State.rep[3] + d.State.rep[3] = d.State.rep[2] + } + d.State.rep[2] = d.State.rep[1] + } + d.State.rep[1] = d.State.rep[0] + d.State.rep[0] = dist + } + n, err := d.State.repLenCodec.Decode(d.rd, posState) + if err != nil { + return nil, err + } + d.State.updateStateRep() + op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance} + return op, nil +} + +// apply takes the operation and transforms the decoder dictionary accordingly. +func (d *decoder) apply(op operation) error { + var err error + switch x := op.(type) { + case match: + err = d.Dict.writeMatch(x.distance, x.n) + case lit: + err = d.Dict.WriteByte(x.b) + default: + panic("op is neither a match nor a literal") + } + return err +} + +// decompress fills the dictionary unless no space for new data is +// available. If the end of the LZMA stream has been reached io.EOF will +// be returned. +func (d *decoder) decompress() error { + if d.eos { + return io.EOF + } + for d.Dict.Available() >= maxMatchLen { + op, err := d.readOp() + switch err { + case nil: + break + case errEOS: + d.eos = true + if !d.rd.possiblyAtEnd() { + return errDataAfterEOS + } + if d.size >= 0 && d.size != d.Decompressed() { + return errSize + } + return io.EOF + case io.EOF: + d.eos = true + return io.ErrUnexpectedEOF + default: + return err + } + if err = d.apply(op); err != nil { + return err + } + if d.size >= 0 && d.Decompressed() >= d.size { + d.eos = true + if d.Decompressed() > d.size { + return errSize + } + if !d.rd.possiblyAtEnd() { + switch _, err = d.readOp(); err { + case nil: + return errSize + case io.EOF: + return io.ErrUnexpectedEOF + case errEOS: + break + default: + return err + } + } + return io.EOF + } + } + return nil +} + +// Errors that may be returned while decoding data. +var ( + errDataAfterEOS = errors.New("lzma: data after end of stream marker") + errSize = errors.New("lzma: wrong uncompressed data size") +) + +// Read reads data from the buffer. If no more data is available io.EOF is +// returned. +func (d *decoder) Read(p []byte) (n int, err error) { + var k int + for { + // Read of decoder dict never returns an error. + k, err = d.Dict.Read(p[n:]) + if err != nil { + panic(fmt.Errorf("dictionary read error %s", err)) + } + if k == 0 && d.eos { + return n, io.EOF + } + n += k + if n >= len(p) { + return n, nil + } + if err = d.decompress(); err != nil && err != io.EOF { + return n, err + } + } +} + +// Decompressed returns the number of bytes decompressed by the decoder. +func (d *decoder) Decompressed() int64 { + return d.Dict.pos() - d.start +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go b/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go new file mode 100644 index 000000000..ba06712b0 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go @@ -0,0 +1,135 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" +) + +// decoderDict provides the dictionary for the decoder. The whole +// dictionary is used as reader buffer. +type decoderDict struct { + buf buffer + head int64 +} + +// newDecoderDict creates a new decoder dictionary. The whole dictionary +// will be used as reader buffer. +func newDecoderDict(dictCap int) (d *decoderDict, err error) { + // lower limit supports easy test cases + if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) { + return nil, errors.New("lzma: dictCap out of range") + } + d = &decoderDict{buf: *newBuffer(dictCap)} + return d, nil +} + +// Reset clears the dictionary. The read buffer is not changed, so the +// buffered data can still be read. +func (d *decoderDict) Reset() { + d.head = 0 +} + +// WriteByte writes a single byte into the dictionary. It is used to +// write literals into the dictionary. +func (d *decoderDict) WriteByte(c byte) error { + if err := d.buf.WriteByte(c); err != nil { + return err + } + d.head++ + return nil +} + +// pos returns the position of the dictionary head. +func (d *decoderDict) pos() int64 { return d.head } + +// dictLen returns the actual length of the dictionary. +func (d *decoderDict) dictLen() int { + capacity := d.buf.Cap() + if d.head >= int64(capacity) { + return capacity + } + return int(d.head) +} + +// byteAt returns a byte stored in the dictionary. If the distance is +// non-positive or exceeds the current length of the dictionary the zero +// byte is returned. +func (d *decoderDict) byteAt(dist int) byte { + if !(0 < dist && dist <= d.dictLen()) { + return 0 + } + i := d.buf.front - dist + if i < 0 { + i += len(d.buf.data) + } + return d.buf.data[i] +} + +// writeMatch writes the match at the top of the dictionary. The given +// distance must point in the current dictionary and the length must not +// exceed the maximum length 273 supported in LZMA. +// +// The error value ErrNoSpace indicates that no space is available in +// the dictionary for writing. You need to read from the dictionary +// first. +func (d *decoderDict) writeMatch(dist int64, length int) error { + if !(0 < dist && dist <= int64(d.dictLen())) { + return errors.New("writeMatch: distance out of range") + } + if !(0 < length && length <= maxMatchLen) { + return errors.New("writeMatch: length out of range") + } + if length > d.buf.Available() { + return ErrNoSpace + } + d.head += int64(length) + + i := d.buf.front - int(dist) + if i < 0 { + i += len(d.buf.data) + } + for length > 0 { + var p []byte + if i >= d.buf.front { + p = d.buf.data[i:] + i = 0 + } else { + p = d.buf.data[i:d.buf.front] + i = d.buf.front + } + if len(p) > length { + p = p[:length] + } + if _, err := d.buf.Write(p); err != nil { + panic(fmt.Errorf("d.buf.Write returned error %s", err)) + } + length -= len(p) + } + return nil +} + +// Write writes the given bytes into the dictionary and advances the +// head. +func (d *decoderDict) Write(p []byte) (n int, err error) { + n, err = d.buf.Write(p) + d.head += int64(n) + return n, err +} + +// Available returns the number of available bytes for writing into the +// decoder dictionary. +func (d *decoderDict) Available() int { return d.buf.Available() } + +// Read reads data from the buffer contained in the decoder dictionary. +func (d *decoderDict) Read(p []byte) (n int, err error) { return d.buf.Read(p) } + +// Buffered returns the number of bytes currently buffered in the +// decoder dictionary. +func (d *decoderDict) buffered() int { return d.buf.Buffered() } + +// Peek gets data from the buffer without advancing the rear index. +func (d *decoderDict) peek(p []byte) (n int, err error) { return d.buf.Peek(p) } diff --git a/vendor/github.com/ulikunitz/xz/lzma/directcodec.go b/vendor/github.com/ulikunitz/xz/lzma/directcodec.go new file mode 100644 index 000000000..e6e0c6ddf --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/directcodec.go @@ -0,0 +1,49 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import "fmt" + +// directCodec allows the encoding and decoding of values with a fixed number +// of bits. The number of bits must be in the range [1,32]. +type directCodec byte + +// makeDirectCodec creates a directCodec. The function panics if the number of +// bits is not in the range [1,32]. +func makeDirectCodec(bits int) directCodec { + if !(1 <= bits && bits <= 32) { + panic(fmt.Errorf("bits=%d out of range", bits)) + } + return directCodec(bits) +} + +// Bits returns the number of bits supported by this codec. +func (dc directCodec) Bits() int { + return int(dc) +} + +// Encode uses the range encoder to encode a value with the fixed number of +// bits. The most-significant bit is encoded first. +func (dc directCodec) Encode(e *rangeEncoder, v uint32) error { + for i := int(dc) - 1; i >= 0; i-- { + if err := e.DirectEncodeBit(v >> uint(i)); err != nil { + return err + } + } + return nil +} + +// Decode uses the range decoder to decode a value with the given number of +// given bits. The most-significant bit is decoded first. +func (dc directCodec) Decode(d *rangeDecoder) (v uint32, err error) { + for i := int(dc) - 1; i >= 0; i-- { + x, err := d.DirectDecodeBit() + if err != nil { + return 0, err + } + v = (v << 1) | x + } + return v, nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/distcodec.go b/vendor/github.com/ulikunitz/xz/lzma/distcodec.go new file mode 100644 index 000000000..69871c04a --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/distcodec.go @@ -0,0 +1,156 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// Constants used by the distance codec. +const ( + // minimum supported distance + minDistance = 1 + // maximum supported distance, value is used for the eos marker. + maxDistance = 1 << 32 + // number of the supported len states + lenStates = 4 + // start for the position models + startPosModel = 4 + // first index with align bits support + endPosModel = 14 + // bits for the position slots + posSlotBits = 6 + // number of align bits + alignBits = 4 + // maximum position slot + maxPosSlot = 63 +) + +// distCodec provides encoding and decoding of distance values. +type distCodec struct { + posSlotCodecs [lenStates]treeCodec + posModel [endPosModel - startPosModel]treeReverseCodec + alignCodec treeReverseCodec +} + +// deepcopy initializes dc as deep copy of the source. +func (dc *distCodec) deepcopy(src *distCodec) { + if dc == src { + return + } + for i := range dc.posSlotCodecs { + dc.posSlotCodecs[i].deepcopy(&src.posSlotCodecs[i]) + } + for i := range dc.posModel { + dc.posModel[i].deepcopy(&src.posModel[i]) + } + dc.alignCodec.deepcopy(&src.alignCodec) +} + +// distBits returns the number of bits required to encode dist. +func distBits(dist uint32) int { + if dist < startPosModel { + return 6 + } + // slot s > 3, dist d + // s = 2(bits(d)-1) + bit(d, bits(d)-2) + // s>>1 = bits(d)-1 + // bits(d) = 32-nlz32(d) + // s>>1=31-nlz32(d) + // n = 5 + (s>>1) = 36 - nlz32(d) + return 36 - nlz32(dist) +} + +// newDistCodec creates a new distance codec. +func (dc *distCodec) init() { + for i := range dc.posSlotCodecs { + dc.posSlotCodecs[i] = makeTreeCodec(posSlotBits) + } + for i := range dc.posModel { + posSlot := startPosModel + i + bits := (posSlot >> 1) - 1 + dc.posModel[i] = makeTreeReverseCodec(bits) + } + dc.alignCodec = makeTreeReverseCodec(alignBits) +} + +// lenState converts the value l to a supported lenState value. +func lenState(l uint32) uint32 { + if l >= lenStates { + l = lenStates - 1 + } + return l +} + +// Encode encodes the distance using the parameter l. Dist can have values from +// the full range of uint32 values. To get the distance offset the actual match +// distance has to be decreased by 1. A distance offset of 0xffffffff (eos) +// indicates the end of the stream. +func (dc *distCodec) Encode(e *rangeEncoder, dist uint32, l uint32) (err error) { + // Compute the posSlot using nlz32 + var posSlot uint32 + var bits uint32 + if dist < startPosModel { + posSlot = dist + } else { + bits = uint32(30 - nlz32(dist)) + posSlot = startPosModel - 2 + (bits << 1) + posSlot += (dist >> uint(bits)) & 1 + } + + if err = dc.posSlotCodecs[lenState(l)].Encode(e, posSlot); err != nil { + return + } + + switch { + case posSlot < startPosModel: + return nil + case posSlot < endPosModel: + tc := &dc.posModel[posSlot-startPosModel] + return tc.Encode(dist, e) + } + dic := directCodec(bits - alignBits) + if err = dic.Encode(e, dist>>alignBits); err != nil { + return + } + return dc.alignCodec.Encode(dist, e) +} + +// Decode decodes the distance offset using the parameter l. The dist value +// 0xffffffff (eos) indicates the end of the stream. Add one to the distance +// offset to get the actual match distance. +func (dc *distCodec) Decode(d *rangeDecoder, l uint32) (dist uint32, err error) { + posSlot, err := dc.posSlotCodecs[lenState(l)].Decode(d) + if err != nil { + return + } + + // posSlot equals distance + if posSlot < startPosModel { + return posSlot, nil + } + + // posSlot uses the individual models + bits := (posSlot >> 1) - 1 + dist = (2 | (posSlot & 1)) << bits + var u uint32 + if posSlot < endPosModel { + tc := &dc.posModel[posSlot-startPosModel] + if u, err = tc.Decode(d); err != nil { + return 0, err + } + dist += u + return dist, nil + } + + // posSlots use direct encoding and a single model for the four align + // bits. + dic := directCodec(bits - alignBits) + if u, err = dic.Decode(d); err != nil { + return 0, err + } + dist += u << alignBits + if u, err = dc.alignCodec.Decode(d); err != nil { + return 0, err + } + dist += u + return dist, nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/encoder.go b/vendor/github.com/ulikunitz/xz/lzma/encoder.go new file mode 100644 index 000000000..59055eb64 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/encoder.go @@ -0,0 +1,268 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "fmt" + "io" +) + +// opLenMargin provides the upper limit of the number of bytes required +// to encode a single operation. +const opLenMargin = 16 + +// compressFlags control the compression process. +type compressFlags uint32 + +// Values for compressFlags. +const ( + // all data should be compressed, even if compression is not + // optimal. + all compressFlags = 1 << iota +) + +// encoderFlags provide the flags for an encoder. +type encoderFlags uint32 + +// Flags for the encoder. +const ( + // eosMarker requests an EOS marker to be written. + eosMarker encoderFlags = 1 << iota +) + +// Encoder compresses data buffered in the encoder dictionary and writes +// it into a byte writer. +type encoder struct { + dict *encoderDict + state *state + re *rangeEncoder + start int64 + // generate eos marker + marker bool + limit bool + margin int +} + +// newEncoder creates a new encoder. If the byte writer must be +// limited use LimitedByteWriter provided by this package. The flags +// argument supports the eosMarker flag, controlling whether a +// terminating end-of-stream marker must be written. +func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict, + flags encoderFlags) (e *encoder, err error) { + + re, err := newRangeEncoder(bw) + if err != nil { + return nil, err + } + e = &encoder{ + dict: dict, + state: state, + re: re, + marker: flags&eosMarker != 0, + start: dict.Pos(), + margin: opLenMargin, + } + if e.marker { + e.margin += 5 + } + return e, nil +} + +// Write writes the bytes from p into the dictionary. If not enough +// space is available the data in the dictionary buffer will be +// compressed to make additional space available. If the limit of the +// underlying writer has been reached ErrLimit will be returned. +func (e *encoder) Write(p []byte) (n int, err error) { + for { + k, err := e.dict.Write(p[n:]) + n += k + if err == ErrNoSpace { + if err = e.compress(0); err != nil { + return n, err + } + continue + } + return n, err + } +} + +// Reopen reopens the encoder with a new byte writer. +func (e *encoder) Reopen(bw io.ByteWriter) error { + var err error + if e.re, err = newRangeEncoder(bw); err != nil { + return err + } + e.start = e.dict.Pos() + e.limit = false + return nil +} + +// writeLiteral writes a literal into the LZMA stream +func (e *encoder) writeLiteral(l lit) error { + var err error + state, state2, _ := e.state.states(e.dict.Pos()) + if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil { + return err + } + litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos()) + match := e.dict.ByteAt(int(e.state.rep[0]) + 1) + err = e.state.litCodec.Encode(e.re, l.b, state, match, litState) + if err != nil { + return err + } + e.state.updateStateLiteral() + return nil +} + +// iverson implements the Iverson operator as proposed by Donald Knuth in his +// book Concrete Mathematics. +func iverson(ok bool) uint32 { + if ok { + return 1 + } + return 0 +} + +// writeMatch writes a repetition operation into the operation stream +func (e *encoder) writeMatch(m match) error { + var err error + if !(minDistance <= m.distance && m.distance <= maxDistance) { + panic(fmt.Errorf("match distance %d out of range", m.distance)) + } + dist := uint32(m.distance - minDistance) + if !(minMatchLen <= m.n && m.n <= maxMatchLen) && + !(dist == e.state.rep[0] && m.n == 1) { + panic(fmt.Errorf( + "match length %d out of range; dist %d rep[0] %d", + m.n, dist, e.state.rep[0])) + } + state, state2, posState := e.state.states(e.dict.Pos()) + if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil { + return err + } + g := 0 + for ; g < 4; g++ { + if e.state.rep[g] == dist { + break + } + } + b := iverson(g < 4) + if err = e.state.isRep[state].Encode(e.re, b); err != nil { + return err + } + n := uint32(m.n - minMatchLen) + if b == 0 { + // simple match + e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] = + e.state.rep[2], e.state.rep[1], e.state.rep[0], dist + e.state.updateStateMatch() + if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil { + return err + } + return e.state.distCodec.Encode(e.re, dist, n) + } + b = iverson(g != 0) + if err = e.state.isRepG0[state].Encode(e.re, b); err != nil { + return err + } + if b == 0 { + // g == 0 + b = iverson(m.n != 1) + if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil { + return err + } + if b == 0 { + e.state.updateStateShortRep() + return nil + } + } else { + // g in {1,2,3} + b = iverson(g != 1) + if err = e.state.isRepG1[state].Encode(e.re, b); err != nil { + return err + } + if b == 1 { + // g in {2,3} + b = iverson(g != 2) + err = e.state.isRepG2[state].Encode(e.re, b) + if err != nil { + return err + } + if b == 1 { + e.state.rep[3] = e.state.rep[2] + } + e.state.rep[2] = e.state.rep[1] + } + e.state.rep[1] = e.state.rep[0] + e.state.rep[0] = dist + } + e.state.updateStateRep() + return e.state.repLenCodec.Encode(e.re, n, posState) +} + +// writeOp writes a single operation to the range encoder. The function +// checks whether there is enough space available to close the LZMA +// stream. +func (e *encoder) writeOp(op operation) error { + if e.re.Available() < int64(e.margin) { + return ErrLimit + } + switch x := op.(type) { + case lit: + return e.writeLiteral(x) + case match: + return e.writeMatch(x) + default: + panic("unexpected operation") + } +} + +// compress compressed data from the dictionary buffer. If the flag all +// is set, all data in the dictionary buffer will be compressed. The +// function returns ErrLimit if the underlying writer has reached its +// limit. +func (e *encoder) compress(flags compressFlags) error { + n := 0 + if flags&all == 0 { + n = maxMatchLen - 1 + } + d := e.dict + m := d.m + for d.Buffered() > n { + op := m.NextOp(e.state.rep) + if err := e.writeOp(op); err != nil { + return err + } + d.Discard(op.Len()) + } + return nil +} + +// eosMatch is a pseudo operation that indicates the end of the stream. +var eosMatch = match{distance: maxDistance, n: minMatchLen} + +// Close terminates the LZMA stream. If requested the end-of-stream +// marker will be written. If the byte writer limit has been or will be +// reached during compression of the remaining data in the buffer the +// LZMA stream will be closed and data will remain in the buffer. +func (e *encoder) Close() error { + err := e.compress(all) + if err != nil && err != ErrLimit { + return err + } + if e.marker { + if err := e.writeMatch(eosMatch); err != nil { + return err + } + } + err = e.re.Close() + return err +} + +// Compressed returns the number bytes of the input data that been +// compressed. +func (e *encoder) Compressed() int64 { + return e.dict.Pos() - e.start +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go b/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go new file mode 100644 index 000000000..40f3d3f64 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go @@ -0,0 +1,149 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "io" +) + +// matcher is an interface that supports the identification of the next +// operation. +type matcher interface { + io.Writer + SetDict(d *encoderDict) + NextOp(rep [4]uint32) operation +} + +// encoderDict provides the dictionary of the encoder. It includes an +// addtional buffer atop of the actual dictionary. +type encoderDict struct { + buf buffer + m matcher + head int64 + capacity int + // preallocated array + data [maxMatchLen]byte +} + +// newEncoderDict creates the encoder dictionary. The argument bufSize +// defines the size of the additional buffer. +func newEncoderDict(dictCap, bufSize int, m matcher) (d *encoderDict, err error) { + if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) { + return nil, errors.New( + "lzma: dictionary capacity out of range") + } + if bufSize < 1 { + return nil, errors.New( + "lzma: buffer size must be larger than zero") + } + d = &encoderDict{ + buf: *newBuffer(dictCap + bufSize), + capacity: dictCap, + m: m, + } + m.SetDict(d) + return d, nil +} + +// Discard discards n bytes. Note that n must not be larger than +// MaxMatchLen. +func (d *encoderDict) Discard(n int) { + p := d.data[:n] + k, _ := d.buf.Read(p) + if k < n { + panic(fmt.Errorf("lzma: can't discard %d bytes", n)) + } + d.head += int64(n) + d.m.Write(p) +} + +// Len returns the data available in the encoder dictionary. +func (d *encoderDict) Len() int { + n := d.buf.Available() + if int64(n) > d.head { + return int(d.head) + } + return n +} + +// DictLen returns the actual length of data in the dictionary. +func (d *encoderDict) DictLen() int { + if d.head < int64(d.capacity) { + return int(d.head) + } + return d.capacity +} + +// Available returns the number of bytes that can be written by a +// following Write call. +func (d *encoderDict) Available() int { + return d.buf.Available() - d.DictLen() +} + +// Write writes data into the dictionary buffer. Note that the position +// of the dictionary head will not be moved. If there is not enough +// space in the buffer ErrNoSpace will be returned. +func (d *encoderDict) Write(p []byte) (n int, err error) { + m := d.Available() + if len(p) > m { + p = p[:m] + err = ErrNoSpace + } + var e error + if n, e = d.buf.Write(p); e != nil { + err = e + } + return n, err +} + +// Pos returns the position of the head. +func (d *encoderDict) Pos() int64 { return d.head } + +// ByteAt returns the byte at the given distance. +func (d *encoderDict) ByteAt(distance int) byte { + if !(0 < distance && distance <= d.Len()) { + return 0 + } + i := d.buf.rear - distance + if i < 0 { + i += len(d.buf.data) + } + return d.buf.data[i] +} + +// CopyN copies the last n bytes from the dictionary into the provided +// writer. This is used for copying uncompressed data into an +// uncompressed segment. +func (d *encoderDict) CopyN(w io.Writer, n int) (written int, err error) { + if n <= 0 { + return 0, nil + } + m := d.Len() + if n > m { + n = m + err = ErrNoSpace + } + i := d.buf.rear - n + var e error + if i < 0 { + i += len(d.buf.data) + if written, e = w.Write(d.buf.data[i:]); e != nil { + return written, e + } + i = 0 + } + var k int + k, e = w.Write(d.buf.data[i:d.buf.rear]) + written += k + if e != nil { + err = e + } + return written, err +} + +// Buffered returns the number of bytes in the buffer. +func (d *encoderDict) Buffered() int { return d.buf.Buffered() } diff --git a/vendor/github.com/ulikunitz/xz/lzma/fox.lzma b/vendor/github.com/ulikunitz/xz/lzma/fox.lzma new file mode 100644 index 000000000..5edad6332 Binary files /dev/null and b/vendor/github.com/ulikunitz/xz/lzma/fox.lzma differ diff --git a/vendor/github.com/ulikunitz/xz/lzma/hashtable.go b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go new file mode 100644 index 000000000..e82970eac --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go @@ -0,0 +1,309 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + + "github.com/ulikunitz/xz/internal/hash" +) + +/* For compression we need to find byte sequences that match the byte + * sequence at the dictionary head. A hash table is a simple method to + * provide this capability. + */ + +// maxMatches limits the number of matches requested from the Matches +// function. This controls the speed of the overall encoding. +const maxMatches = 16 + +// shortDists defines the number of short distances supported by the +// implementation. +const shortDists = 8 + +// The minimum is somehow arbitrary but the maximum is limited by the +// memory requirements of the hash table. +const ( + minTableExponent = 9 + maxTableExponent = 20 +) + +// newRoller contains the function used to create an instance of the +// hash.Roller. +var newRoller = func(n int) hash.Roller { return hash.NewCyclicPoly(n) } + +// hashTable stores the hash table including the rolling hash method. +// +// We implement chained hashing into a circular buffer. Each entry in +// the circular buffer stores the delta distance to the next position with a +// word that has the same hash value. +type hashTable struct { + dict *encoderDict + // actual hash table + t []int64 + // circular list data with the offset to the next word + data []uint32 + front int + // mask for computing the index for the hash table + mask uint64 + // hash offset; initial value is -int64(wordLen) + hoff int64 + // length of the hashed word + wordLen int + // hash roller for computing the hash values for the Write + // method + wr hash.Roller + // hash roller for computing arbitrary hashes + hr hash.Roller + // preallocated slices + p [maxMatches]int64 + distances [maxMatches + shortDists]int +} + +// hashTableExponent derives the hash table exponent from the dictionary +// capacity. +func hashTableExponent(n uint32) int { + e := 30 - nlz32(n) + switch { + case e < minTableExponent: + e = minTableExponent + case e > maxTableExponent: + e = maxTableExponent + } + return e +} + +// newHashTable creates a new hash table for words of length wordLen +func newHashTable(capacity int, wordLen int) (t *hashTable, err error) { + if !(0 < capacity) { + return nil, errors.New( + "newHashTable: capacity must not be negative") + } + exp := hashTableExponent(uint32(capacity)) + if !(1 <= wordLen && wordLen <= 4) { + return nil, errors.New("newHashTable: " + + "argument wordLen out of range") + } + n := 1 << uint(exp) + if n <= 0 { + panic("newHashTable: exponent is too large") + } + t = &hashTable{ + t: make([]int64, n), + data: make([]uint32, capacity), + mask: (uint64(1) << uint(exp)) - 1, + hoff: -int64(wordLen), + wordLen: wordLen, + wr: newRoller(wordLen), + hr: newRoller(wordLen), + } + return t, nil +} + +func (t *hashTable) SetDict(d *encoderDict) { t.dict = d } + +// buffered returns the number of bytes that are currently hashed. +func (t *hashTable) buffered() int { + n := t.hoff + 1 + switch { + case n <= 0: + return 0 + case n >= int64(len(t.data)): + return len(t.data) + } + return int(n) +} + +// addIndex adds n to an index ensuring that is stays inside the +// circular buffer for the hash chain. +func (t *hashTable) addIndex(i, n int) int { + i += n - len(t.data) + if i < 0 { + i += len(t.data) + } + return i +} + +// putDelta puts the delta instance at the current front of the circular +// chain buffer. +func (t *hashTable) putDelta(delta uint32) { + t.data[t.front] = delta + t.front = t.addIndex(t.front, 1) +} + +// putEntry puts a new entry into the hash table. If there is already a +// value stored it is moved into the circular chain buffer. +func (t *hashTable) putEntry(h uint64, pos int64) { + if pos < 0 { + return + } + i := h & t.mask + old := t.t[i] - 1 + t.t[i] = pos + 1 + var delta int64 + if old >= 0 { + delta = pos - old + if delta > 1<<32-1 || delta > int64(t.buffered()) { + delta = 0 + } + } + t.putDelta(uint32(delta)) +} + +// WriteByte converts a single byte into a hash and puts them into the hash +// table. +func (t *hashTable) WriteByte(b byte) error { + h := t.wr.RollByte(b) + t.hoff++ + t.putEntry(h, t.hoff) + return nil +} + +// Write converts the bytes provided into hash tables and stores the +// abbreviated offsets into the hash table. The method will never return an +// error. +func (t *hashTable) Write(p []byte) (n int, err error) { + for _, b := range p { + // WriteByte doesn't generate an error. + t.WriteByte(b) + } + return len(p), nil +} + +// getMatches the matches for a specific hash. The functions returns the +// number of positions found. +// +// TODO: Make a getDistances because that we are actually interested in. +func (t *hashTable) getMatches(h uint64, positions []int64) (n int) { + if t.hoff < 0 || len(positions) == 0 { + return 0 + } + buffered := t.buffered() + tailPos := t.hoff + 1 - int64(buffered) + rear := t.front - buffered + if rear >= 0 { + rear -= len(t.data) + } + // get the slot for the hash + pos := t.t[h&t.mask] - 1 + delta := pos - tailPos + for { + if delta < 0 { + return n + } + positions[n] = tailPos + delta + n++ + if n >= len(positions) { + return n + } + i := rear + int(delta) + if i < 0 { + i += len(t.data) + } + u := t.data[i] + if u == 0 { + return n + } + delta -= int64(u) + } +} + +// hash computes the rolling hash for the word stored in p. For correct +// results its length must be equal to t.wordLen. +func (t *hashTable) hash(p []byte) uint64 { + var h uint64 + for _, b := range p { + h = t.hr.RollByte(b) + } + return h +} + +// Matches fills the positions slice with potential matches. The +// functions returns the number of positions filled into positions. The +// byte slice p must have word length of the hash table. +func (t *hashTable) Matches(p []byte, positions []int64) int { + if len(p) != t.wordLen { + panic(fmt.Errorf( + "byte slice must have length %d", t.wordLen)) + } + h := t.hash(p) + return t.getMatches(h, positions) +} + +// NextOp identifies the next operation using the hash table. +// +// TODO: Use all repetitions to find matches. +func (t *hashTable) NextOp(rep [4]uint32) operation { + // get positions + data := t.dict.data[:maxMatchLen] + n, _ := t.dict.buf.Peek(data) + data = data[:n] + var p []int64 + if n < t.wordLen { + p = t.p[:0] + } else { + p = t.p[:maxMatches] + n = t.Matches(data[:t.wordLen], p) + p = p[:n] + } + + // convert positions in potential distances + head := t.dict.head + dists := append(t.distances[:0], 1, 2, 3, 4, 5, 6, 7, 8) + for _, pos := range p { + dis := int(head - pos) + if dis > shortDists { + dists = append(dists, dis) + } + } + + // check distances + var m match + dictLen := t.dict.DictLen() + for _, dist := range dists { + if dist > dictLen { + continue + } + + // Here comes a trick. We are only interested in matches + // that are longer than the matches we have been found + // before. So before we test the whole byte sequence at + // the given distance, we test the first byte that would + // make the match longer. If it doesn't match the byte + // to match, we don't to care any longer. + i := t.dict.buf.rear - dist + m.n + if i < 0 { + i += len(t.dict.buf.data) + } + if t.dict.buf.data[i] != data[m.n] { + // We can't get a longer match. Jump to the next + // distance. + continue + } + + n := t.dict.buf.matchLen(dist, data) + switch n { + case 0: + continue + case 1: + if uint32(dist-minDistance) != rep[0] { + continue + } + } + if n > m.n { + m = match{int64(dist), n} + if n == len(data) { + // No better match will be found. + break + } + } + } + + if m.n == 0 { + return lit{data[0]} + } + return m +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/header.go b/vendor/github.com/ulikunitz/xz/lzma/header.go new file mode 100644 index 000000000..cda39462c --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/header.go @@ -0,0 +1,167 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" +) + +// uint32LE reads an uint32 integer from a byte slice +func uint32LE(b []byte) uint32 { + x := uint32(b[3]) << 24 + x |= uint32(b[2]) << 16 + x |= uint32(b[1]) << 8 + x |= uint32(b[0]) + return x +} + +// uint64LE converts the uint64 value stored as little endian to an uint64 +// value. +func uint64LE(b []byte) uint64 { + x := uint64(b[7]) << 56 + x |= uint64(b[6]) << 48 + x |= uint64(b[5]) << 40 + x |= uint64(b[4]) << 32 + x |= uint64(b[3]) << 24 + x |= uint64(b[2]) << 16 + x |= uint64(b[1]) << 8 + x |= uint64(b[0]) + return x +} + +// putUint32LE puts an uint32 integer into a byte slice that must have at least +// a length of 4 bytes. +func putUint32LE(b []byte, x uint32) { + b[0] = byte(x) + b[1] = byte(x >> 8) + b[2] = byte(x >> 16) + b[3] = byte(x >> 24) +} + +// putUint64LE puts the uint64 value into the byte slice as little endian +// value. The byte slice b must have at least place for 8 bytes. +func putUint64LE(b []byte, x uint64) { + b[0] = byte(x) + b[1] = byte(x >> 8) + b[2] = byte(x >> 16) + b[3] = byte(x >> 24) + b[4] = byte(x >> 32) + b[5] = byte(x >> 40) + b[6] = byte(x >> 48) + b[7] = byte(x >> 56) +} + +// noHeaderSize defines the value of the length field in the LZMA header. +const noHeaderSize uint64 = 1<<64 - 1 + +// HeaderLen provides the length of the LZMA file header. +const HeaderLen = 13 + +// header represents the header of an LZMA file. +type header struct { + properties Properties + dictCap int + // uncompressed size; negative value if no size is given + size int64 +} + +// marshalBinary marshals the header. +func (h *header) marshalBinary() (data []byte, err error) { + if err = h.properties.verify(); err != nil { + return nil, err + } + if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) { + return nil, fmt.Errorf("lzma: DictCap %d out of range", + h.dictCap) + } + + data = make([]byte, 13) + + // property byte + data[0] = h.properties.Code() + + // dictionary capacity + putUint32LE(data[1:5], uint32(h.dictCap)) + + // uncompressed size + var s uint64 + if h.size > 0 { + s = uint64(h.size) + } else { + s = noHeaderSize + } + putUint64LE(data[5:], s) + + return data, nil +} + +// unmarshalBinary unmarshals the header. +func (h *header) unmarshalBinary(data []byte) error { + if len(data) != HeaderLen { + return errors.New("lzma.unmarshalBinary: data has wrong length") + } + + // properties + var err error + if h.properties, err = PropertiesForCode(data[0]); err != nil { + return err + } + + // dictionary capacity + h.dictCap = int(uint32LE(data[1:])) + if h.dictCap < 0 { + return errors.New( + "LZMA header: dictionary capacity exceeds maximum " + + "integer") + } + + // uncompressed size + s := uint64LE(data[5:]) + if s == noHeaderSize { + h.size = -1 + } else { + h.size = int64(s) + if h.size < 0 { + return errors.New( + "LZMA header: uncompressed size " + + "out of int64 range") + } + } + + return nil +} + +// validDictCap checks whether the dictionary capacity is correct. This +// is used to weed out wrong file headers. +func validDictCap(dictcap int) bool { + if int64(dictcap) == MaxDictCap { + return true + } + for n := uint(10); n < 32; n++ { + if dictcap == 1<= 10 or 2^32-1. If +// there is an explicit size it must not exceed 256 GiB. The length of +// the data argument must be HeaderLen. +func ValidHeader(data []byte) bool { + var h header + if err := h.unmarshalBinary(data); err != nil { + return false + } + if !validDictCap(h.dictCap) { + return false + } + return h.size < 0 || h.size <= 1<<38 +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/header2.go b/vendor/github.com/ulikunitz/xz/lzma/header2.go new file mode 100644 index 000000000..cd148812c --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/header2.go @@ -0,0 +1,398 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "io" +) + +const ( + // maximum size of compressed data in a chunk + maxCompressed = 1 << 16 + // maximum size of uncompressed data in a chunk + maxUncompressed = 1 << 21 +) + +// chunkType represents the type of an LZMA2 chunk. Note that this +// value is an internal representation and no actual encoding of a LZMA2 +// chunk header. +type chunkType byte + +// Possible values for the chunk type. +const ( + // end of stream + cEOS chunkType = iota + // uncompressed; reset dictionary + cUD + // uncompressed; no reset of dictionary + cU + // LZMA compressed; no reset + cL + // LZMA compressed; reset state + cLR + // LZMA compressed; reset state; new property value + cLRN + // LZMA compressed; reset state; new property value; reset dictionary + cLRND +) + +// chunkTypeStrings provide a string representation for the chunk types. +var chunkTypeStrings = [...]string{ + cEOS: "EOS", + cU: "U", + cUD: "UD", + cL: "L", + cLR: "LR", + cLRN: "LRN", + cLRND: "LRND", +} + +// String returns a string representation of the chunk type. +func (c chunkType) String() string { + if !(cEOS <= c && c <= cLRND) { + return "unknown" + } + return chunkTypeStrings[c] +} + +// Actual encodings for the chunk types in the value. Note that the high +// uncompressed size bits are stored in the header byte additionally. +const ( + hEOS = 0 + hUD = 1 + hU = 2 + hL = 1 << 7 + hLR = 1<<7 | 1<<5 + hLRN = 1<<7 | 1<<6 + hLRND = 1<<7 | 1<<6 | 1<<5 +) + +// errHeaderByte indicates an unsupported value for the chunk header +// byte. These bytes starts the variable-length chunk header. +var errHeaderByte = errors.New("lzma: unsupported chunk header byte") + +// headerChunkType converts the header byte into a chunk type. It +// ignores the uncompressed size bits in the chunk header byte. +func headerChunkType(h byte) (c chunkType, err error) { + if h&hL == 0 { + // no compression + switch h { + case hEOS: + c = cEOS + case hUD: + c = cUD + case hU: + c = cU + default: + return 0, errHeaderByte + } + return + } + switch h & hLRND { + case hL: + c = cL + case hLR: + c = cLR + case hLRN: + c = cLRN + case hLRND: + c = cLRND + default: + return 0, errHeaderByte + } + return +} + +// uncompressedHeaderLen provides the length of an uncompressed header +const uncompressedHeaderLen = 3 + +// headerLen returns the length of the LZMA2 header for a given chunk +// type. +func headerLen(c chunkType) int { + switch c { + case cEOS: + return 1 + case cU, cUD: + return uncompressedHeaderLen + case cL, cLR: + return 5 + case cLRN, cLRND: + return 6 + } + panic(fmt.Errorf("unsupported chunk type %d", c)) +} + +// chunkHeader represents the contents of a chunk header. +type chunkHeader struct { + ctype chunkType + uncompressed uint32 + compressed uint16 + props Properties +} + +// String returns a string representation of the chunk header. +func (h *chunkHeader) String() string { + return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed, + h.compressed, &h.props) +} + +// UnmarshalBinary reads the content of the chunk header from the data +// slice. The slice must have the correct length. +func (h *chunkHeader) UnmarshalBinary(data []byte) error { + if len(data) == 0 { + return errors.New("no data") + } + c, err := headerChunkType(data[0]) + if err != nil { + return err + } + + n := headerLen(c) + if len(data) < n { + return errors.New("incomplete data") + } + if len(data) > n { + return errors.New("invalid data length") + } + + *h = chunkHeader{ctype: c} + if c == cEOS { + return nil + } + + h.uncompressed = uint32(uint16BE(data[1:3])) + if c <= cU { + return nil + } + h.uncompressed |= uint32(data[0]&^hLRND) << 16 + + h.compressed = uint16BE(data[3:5]) + if c <= cLR { + return nil + } + + h.props, err = PropertiesForCode(data[5]) + return err +} + +// MarshalBinary encodes the chunk header value. The function checks +// whether the content of the chunk header is correct. +func (h *chunkHeader) MarshalBinary() (data []byte, err error) { + if h.ctype > cLRND { + return nil, errors.New("invalid chunk type") + } + if err = h.props.verify(); err != nil { + return nil, err + } + + data = make([]byte, headerLen(h.ctype)) + + switch h.ctype { + case cEOS: + return data, nil + case cUD: + data[0] = hUD + case cU: + data[0] = hU + case cL: + data[0] = hL + case cLR: + data[0] = hLR + case cLRN: + data[0] = hLRN + case cLRND: + data[0] = hLRND + } + + putUint16BE(data[1:3], uint16(h.uncompressed)) + if h.ctype <= cU { + return data, nil + } + data[0] |= byte(h.uncompressed>>16) &^ hLRND + + putUint16BE(data[3:5], h.compressed) + if h.ctype <= cLR { + return data, nil + } + + data[5] = h.props.Code() + return data, nil +} + +// readChunkHeader reads the chunk header from the IO reader. +func readChunkHeader(r io.Reader) (h *chunkHeader, err error) { + p := make([]byte, 1, 6) + if _, err = io.ReadFull(r, p); err != nil { + return + } + c, err := headerChunkType(p[0]) + if err != nil { + return + } + p = p[:headerLen(c)] + if _, err = io.ReadFull(r, p[1:]); err != nil { + return + } + h = new(chunkHeader) + if err = h.UnmarshalBinary(p); err != nil { + return nil, err + } + return h, nil +} + +// uint16BE converts a big-endian uint16 representation to an uint16 +// value. +func uint16BE(p []byte) uint16 { + return uint16(p[0])<<8 | uint16(p[1]) +} + +// putUint16BE puts the big-endian uint16 presentation into the given +// slice. +func putUint16BE(p []byte, x uint16) { + p[0] = byte(x >> 8) + p[1] = byte(x) +} + +// chunkState is used to manage the state of the chunks +type chunkState byte + +// start and stop define the initial and terminating state of the chunk +// state +const ( + start chunkState = 'S' + stop = 'T' +) + +// errors for the chunk state handling +var ( + errChunkType = errors.New("lzma: unexpected chunk type") + errState = errors.New("lzma: wrong chunk state") +) + +// next transitions state based on chunk type input +func (c *chunkState) next(ctype chunkType) error { + switch *c { + // start state + case 'S': + switch ctype { + case cEOS: + *c = 'T' + case cUD: + *c = 'R' + case cLRND: + *c = 'L' + default: + return errChunkType + } + // normal LZMA mode + case 'L': + switch ctype { + case cEOS: + *c = 'T' + case cUD: + *c = 'R' + case cU: + *c = 'U' + case cL, cLR, cLRN, cLRND: + break + default: + return errChunkType + } + // reset required + case 'R': + switch ctype { + case cEOS: + *c = 'T' + case cUD, cU: + break + case cLRN, cLRND: + *c = 'L' + default: + return errChunkType + } + // uncompressed + case 'U': + switch ctype { + case cEOS: + *c = 'T' + case cUD: + *c = 'R' + case cU: + break + case cL, cLR, cLRN, cLRND: + *c = 'L' + default: + return errChunkType + } + // terminal state + case 'T': + return errChunkType + default: + return errState + } + return nil +} + +// defaultChunkType returns the default chunk type for each chunk state. +func (c chunkState) defaultChunkType() chunkType { + switch c { + case 'S': + return cLRND + case 'L', 'U': + return cL + case 'R': + return cLRN + default: + // no error + return cEOS + } +} + +// maxDictCap defines the maximum dictionary capacity supported by the +// LZMA2 dictionary capacity encoding. +const maxDictCap = 1<<32 - 1 + +// maxDictCapCode defines the maximum dictionary capacity code. +const maxDictCapCode = 40 + +// The function decodes the dictionary capacity byte, but doesn't change +// for the correct range of the given byte. +func decodeDictCap(c byte) int64 { + return (2 | int64(c)&1) << (11 + (c>>1)&0x1f) +} + +// DecodeDictCap decodes the encoded dictionary capacity. The function +// returns an error if the code is out of range. +func DecodeDictCap(c byte) (n int64, err error) { + if c >= maxDictCapCode { + if c == maxDictCapCode { + return maxDictCap, nil + } + return 0, errors.New("lzma: invalid dictionary size code") + } + return decodeDictCap(c), nil +} + +// EncodeDictCap encodes a dictionary capacity. The function returns the +// code for the capacity that is greater or equal n. If n exceeds the +// maximum support dictionary capacity, the maximum value is returned. +func EncodeDictCap(n int64) byte { + a, b := byte(0), byte(40) + for a < b { + c := a + (b-a)>>1 + m := decodeDictCap(c) + if n <= m { + if n == m { + return c + } + b = c + } else { + a = c + 1 + } + } + return a +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go b/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go new file mode 100644 index 000000000..927395bd8 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go @@ -0,0 +1,129 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import "errors" + +// maxPosBits defines the number of bits of the position value that are used to +// to compute the posState value. The value is used to select the tree codec +// for length encoding and decoding. +const maxPosBits = 4 + +// minMatchLen and maxMatchLen give the minimum and maximum values for +// encoding and decoding length values. minMatchLen is also used as base +// for the encoded length values. +const ( + minMatchLen = 2 + maxMatchLen = minMatchLen + 16 + 256 - 1 +) + +// lengthCodec support the encoding of the length value. +type lengthCodec struct { + choice [2]prob + low [1 << maxPosBits]treeCodec + mid [1 << maxPosBits]treeCodec + high treeCodec +} + +// deepcopy initializes the lc value as deep copy of the source value. +func (lc *lengthCodec) deepcopy(src *lengthCodec) { + if lc == src { + return + } + lc.choice = src.choice + for i := range lc.low { + lc.low[i].deepcopy(&src.low[i]) + } + for i := range lc.mid { + lc.mid[i].deepcopy(&src.mid[i]) + } + lc.high.deepcopy(&src.high) +} + +// init initializes a new length codec. +func (lc *lengthCodec) init() { + for i := range lc.choice { + lc.choice[i] = probInit + } + for i := range lc.low { + lc.low[i] = makeTreeCodec(3) + } + for i := range lc.mid { + lc.mid[i] = makeTreeCodec(3) + } + lc.high = makeTreeCodec(8) +} + +// lBits gives the number of bits used for the encoding of the l value +// provided to the range encoder. +func lBits(l uint32) int { + switch { + case l < 8: + return 4 + case l < 16: + return 5 + default: + return 10 + } +} + +// Encode encodes the length offset. The length offset l can be compute by +// subtracting minMatchLen (2) from the actual length. +// +// l = length - minMatchLen +// +func (lc *lengthCodec) Encode(e *rangeEncoder, l uint32, posState uint32, +) (err error) { + if l > maxMatchLen-minMatchLen { + return errors.New("lengthCodec.Encode: l out of range") + } + if l < 8 { + if err = lc.choice[0].Encode(e, 0); err != nil { + return + } + return lc.low[posState].Encode(e, l) + } + if err = lc.choice[0].Encode(e, 1); err != nil { + return + } + if l < 16 { + if err = lc.choice[1].Encode(e, 0); err != nil { + return + } + return lc.mid[posState].Encode(e, l-8) + } + if err = lc.choice[1].Encode(e, 1); err != nil { + return + } + if err = lc.high.Encode(e, l-16); err != nil { + return + } + return nil +} + +// Decode reads the length offset. Add minMatchLen to compute the actual length +// to the length offset l. +func (lc *lengthCodec) Decode(d *rangeDecoder, posState uint32, +) (l uint32, err error) { + var b uint32 + if b, err = lc.choice[0].Decode(d); err != nil { + return + } + if b == 0 { + l, err = lc.low[posState].Decode(d) + return + } + if b, err = lc.choice[1].Decode(d); err != nil { + return + } + if b == 0 { + l, err = lc.mid[posState].Decode(d) + l += 8 + return + } + l, err = lc.high.Decode(d) + l += 16 + return +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go b/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go new file mode 100644 index 000000000..ca31530fd --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go @@ -0,0 +1,132 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// literalCodec supports the encoding of literal. It provides 768 probability +// values per literal state. The upper 512 probabilities are used with the +// context of a match bit. +type literalCodec struct { + probs []prob +} + +// deepcopy initializes literal codec c as a deep copy of the source. +func (c *literalCodec) deepcopy(src *literalCodec) { + if c == src { + return + } + c.probs = make([]prob, len(src.probs)) + copy(c.probs, src.probs) +} + +// init initializes the literal codec. +func (c *literalCodec) init(lc, lp int) { + switch { + case !(minLC <= lc && lc <= maxLC): + panic("lc out of range") + case !(minLP <= lp && lp <= maxLP): + panic("lp out of range") + } + c.probs = make([]prob, 0x300<= 7 { + m := uint32(match) + for { + matchBit := (m >> 7) & 1 + m <<= 1 + bit := (r >> 7) & 1 + r <<= 1 + i := ((1 + matchBit) << 8) | symbol + if err = probs[i].Encode(e, bit); err != nil { + return + } + symbol = (symbol << 1) | bit + if matchBit != bit { + break + } + if symbol >= 0x100 { + break + } + } + } + for symbol < 0x100 { + bit := (r >> 7) & 1 + r <<= 1 + if err = probs[symbol].Encode(e, bit); err != nil { + return + } + symbol = (symbol << 1) | bit + } + return nil +} + +// Decode decodes a literal byte using the range decoder as well as the LZMA +// state, a match byte, and the literal state. +func (c *literalCodec) Decode(d *rangeDecoder, + state uint32, match byte, litState uint32, +) (s byte, err error) { + k := litState * 0x300 + probs := c.probs[k : k+0x300] + symbol := uint32(1) + if state >= 7 { + m := uint32(match) + for { + matchBit := (m >> 7) & 1 + m <<= 1 + i := ((1 + matchBit) << 8) | symbol + bit, err := d.DecodeBit(&probs[i]) + if err != nil { + return 0, err + } + symbol = (symbol << 1) | bit + if matchBit != bit { + break + } + if symbol >= 0x100 { + break + } + } + } + for symbol < 0x100 { + bit, err := d.DecodeBit(&probs[symbol]) + if err != nil { + return 0, err + } + symbol = (symbol << 1) | bit + } + s = byte(symbol - 0x100) + return s, nil +} + +// minLC and maxLC define the range for LC values. +const ( + minLC = 0 + maxLC = 8 +) + +// minLC and maxLC define the range for LP values. +const ( + minLP = 0 + maxLP = 4 +) + +// minState and maxState define a range for the state values stored in +// the State values. +const ( + minState = 0 + maxState = 11 +) diff --git a/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go b/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go new file mode 100644 index 000000000..7d03ec0dc --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go @@ -0,0 +1,52 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import "errors" + +// MatchAlgorithm identifies an algorithm to find matches in the +// dictionary. +type MatchAlgorithm byte + +// Supported matcher algorithms. +const ( + HashTable4 MatchAlgorithm = iota + BinaryTree +) + +// maStrings are used by the String method. +var maStrings = map[MatchAlgorithm]string{ + HashTable4: "HashTable4", + BinaryTree: "BinaryTree", +} + +// String returns a string representation of the Matcher. +func (a MatchAlgorithm) String() string { + if s, ok := maStrings[a]; ok { + return s + } + return "unknown" +} + +var errUnsupportedMatchAlgorithm = errors.New( + "lzma: unsupported match algorithm value") + +// verify checks whether the matcher value is supported. +func (a MatchAlgorithm) verify() error { + if _, ok := maStrings[a]; !ok { + return errUnsupportedMatchAlgorithm + } + return nil +} + +func (a MatchAlgorithm) new(dictCap int) (m matcher, err error) { + switch a { + case HashTable4: + return newHashTable(dictCap, 4) + case BinaryTree: + return newBinTree(dictCap) + } + return nil, errUnsupportedMatchAlgorithm +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/operation.go b/vendor/github.com/ulikunitz/xz/lzma/operation.go new file mode 100644 index 000000000..a75c9b46c --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/operation.go @@ -0,0 +1,80 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "unicode" +) + +// operation represents an operation on the dictionary during encoding or +// decoding. +type operation interface { + Len() int +} + +// rep represents a repetition at the given distance and the given length +type match struct { + // supports all possible distance values, including the eos marker + distance int64 + // length + n int +} + +// verify checks whether the match is valid. If that is not the case an +// error is returned. +func (m match) verify() error { + if !(minDistance <= m.distance && m.distance <= maxDistance) { + return errors.New("distance out of range") + } + if !(1 <= m.n && m.n <= maxMatchLen) { + return errors.New("length out of range") + } + return nil +} + +// l return the l-value for the match, which is the difference of length +// n and 2. +func (m match) l() uint32 { + return uint32(m.n - minMatchLen) +} + +// dist returns the dist value for the match, which is one less of the +// distance stored in the match. +func (m match) dist() uint32 { + return uint32(m.distance - minDistance) +} + +// Len returns the number of bytes matched. +func (m match) Len() int { + return m.n +} + +// String returns a string representation for the repetition. +func (m match) String() string { + return fmt.Sprintf("M{%d,%d}", m.distance, m.n) +} + +// lit represents a single byte literal. +type lit struct { + b byte +} + +// Len returns 1 for the single byte literal. +func (l lit) Len() int { + return 1 +} + +// String returns a string representation for the literal. +func (l lit) String() string { + var c byte + if unicode.IsPrint(rune(l.b)) { + c = l.b + } else { + c = '.' + } + return fmt.Sprintf("L{%c/%02x}", c, l.b) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/prob.go b/vendor/github.com/ulikunitz/xz/lzma/prob.go new file mode 100644 index 000000000..6987a166f --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/prob.go @@ -0,0 +1,53 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// movebits defines the number of bits used for the updates of probability +// values. +const movebits = 5 + +// probbits defines the number of bits of a probability value. +const probbits = 11 + +// probInit defines 0.5 as initial value for prob values. +const probInit prob = 1 << (probbits - 1) + +// Type prob represents probabilities. The type can also be used to encode and +// decode single bits. +type prob uint16 + +// Dec decreases the probability. The decrease is proportional to the +// probability value. +func (p *prob) dec() { + *p -= *p >> movebits +} + +// Inc increases the probability. The Increase is proportional to the +// difference of 1 and the probability value. +func (p *prob) inc() { + *p += ((1 << probbits) - *p) >> movebits +} + +// Computes the new bound for a given range using the probability value. +func (p prob) bound(r uint32) uint32 { + return (r >> probbits) * uint32(p) +} + +// Bits returns 1. One is the number of bits that can be encoded or decoded +// with a single prob value. +func (p prob) Bits() int { + return 1 +} + +// Encode encodes the least-significant bit of v. Note that the p value will be +// changed. +func (p *prob) Encode(e *rangeEncoder, v uint32) error { + return e.EncodeBit(v, p) +} + +// Decode decodes a single bit. Note that the p value will change. +func (p *prob) Decode(d *rangeDecoder) (v uint32, err error) { + return d.DecodeBit(p) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/properties.go b/vendor/github.com/ulikunitz/xz/lzma/properties.go new file mode 100644 index 000000000..662feba87 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/properties.go @@ -0,0 +1,69 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" +) + +// maximum and minimum values for the LZMA properties. +const ( + minPB = 0 + maxPB = 4 +) + +// maxPropertyCode is the possible maximum of a properties code byte. +const maxPropertyCode = (maxPB+1)*(maxLP+1)*(maxLC+1) - 1 + +// Properties contains the parameters LC, LP and PB. The parameter LC +// defines the number of literal context bits; parameter LP the number +// of literal position bits and PB the number of position bits. +type Properties struct { + LC int + LP int + PB int +} + +// String returns the properties in a string representation. +func (p *Properties) String() string { + return fmt.Sprintf("LC %d LP %d PB %d", p.LC, p.LP, p.PB) +} + +// PropertiesForCode converts a properties code byte into a Properties value. +func PropertiesForCode(code byte) (p Properties, err error) { + if code > maxPropertyCode { + return p, errors.New("lzma: invalid properties code") + } + p.LC = int(code % 9) + code /= 9 + p.LP = int(code % 5) + code /= 5 + p.PB = int(code % 5) + return p, err +} + +// verify checks the properties for correctness. +func (p *Properties) verify() error { + if p == nil { + return errors.New("lzma: properties are nil") + } + if !(minLC <= p.LC && p.LC <= maxLC) { + return errors.New("lzma: lc out of range") + } + if !(minLP <= p.LP && p.LP <= maxLP) { + return errors.New("lzma: lp out of range") + } + if !(minPB <= p.PB && p.PB <= maxPB) { + return errors.New("lzma: pb out of range") + } + return nil +} + +// Code converts the properties to a byte. The function assumes that +// the properties components are all in range. +func (p Properties) Code() byte { + return byte((p.PB*5+p.LP)*9 + p.LC) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go b/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go new file mode 100644 index 000000000..7189a0377 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go @@ -0,0 +1,248 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" +) + +// rangeEncoder implements range encoding of single bits. The low value can +// overflow therefore we need uint64. The cache value is used to handle +// overflows. +type rangeEncoder struct { + lbw *LimitedByteWriter + nrange uint32 + low uint64 + cacheLen int64 + cache byte +} + +// maxInt64 provides the maximal value of the int64 type +const maxInt64 = 1<<63 - 1 + +// newRangeEncoder creates a new range encoder. +func newRangeEncoder(bw io.ByteWriter) (re *rangeEncoder, err error) { + lbw, ok := bw.(*LimitedByteWriter) + if !ok { + lbw = &LimitedByteWriter{BW: bw, N: maxInt64} + } + return &rangeEncoder{ + lbw: lbw, + nrange: 0xffffffff, + cacheLen: 1}, nil +} + +// Available returns the number of bytes that still can be written. The +// method takes the bytes that will be currently written by Close into +// account. +func (e *rangeEncoder) Available() int64 { + return e.lbw.N - (e.cacheLen + 4) +} + +// writeByte writes a single byte to the underlying writer. An error is +// returned if the limit is reached. The written byte will be counted if +// the underlying writer doesn't return an error. +func (e *rangeEncoder) writeByte(c byte) error { + if e.Available() < 1 { + return ErrLimit + } + return e.lbw.WriteByte(c) +} + +// DirectEncodeBit encodes the least-significant bit of b with probability 1/2. +func (e *rangeEncoder) DirectEncodeBit(b uint32) error { + e.nrange >>= 1 + e.low += uint64(e.nrange) & (0 - (uint64(b) & 1)) + + // normalize + const top = 1 << 24 + if e.nrange >= top { + return nil + } + e.nrange <<= 8 + return e.shiftLow() +} + +// EncodeBit encodes the least significant bit of b. The p value will be +// updated by the function depending on the bit encoded. +func (e *rangeEncoder) EncodeBit(b uint32, p *prob) error { + bound := p.bound(e.nrange) + if b&1 == 0 { + e.nrange = bound + p.inc() + } else { + e.low += uint64(bound) + e.nrange -= bound + p.dec() + } + + // normalize + const top = 1 << 24 + if e.nrange >= top { + return nil + } + e.nrange <<= 8 + return e.shiftLow() +} + +// Close writes a complete copy of the low value. +func (e *rangeEncoder) Close() error { + for i := 0; i < 5; i++ { + if err := e.shiftLow(); err != nil { + return err + } + } + return nil +} + +// shiftLow shifts the low value for 8 bit. The shifted byte is written into +// the byte writer. The cache value is used to handle overflows. +func (e *rangeEncoder) shiftLow() error { + if uint32(e.low) < 0xff000000 || (e.low>>32) != 0 { + tmp := e.cache + for { + err := e.writeByte(tmp + byte(e.low>>32)) + if err != nil { + return err + } + tmp = 0xff + e.cacheLen-- + if e.cacheLen <= 0 { + if e.cacheLen < 0 { + panic("negative cacheLen") + } + break + } + } + e.cache = byte(uint32(e.low) >> 24) + } + e.cacheLen++ + e.low = uint64(uint32(e.low) << 8) + return nil +} + +// rangeDecoder decodes single bits of the range encoding stream. +type rangeDecoder struct { + br io.ByteReader + nrange uint32 + code uint32 +} + +// init initializes the range decoder, by reading from the byte reader. +func (d *rangeDecoder) init() error { + d.nrange = 0xffffffff + d.code = 0 + + b, err := d.br.ReadByte() + if err != nil { + return err + } + if b != 0 { + return errors.New("newRangeDecoder: first byte not zero") + } + + for i := 0; i < 4; i++ { + if err = d.updateCode(); err != nil { + return err + } + } + + if d.code >= d.nrange { + return errors.New("newRangeDecoder: d.code >= d.nrange") + } + + return nil +} + +// newRangeDecoder initializes a range decoder. It reads five bytes from the +// reader and therefore may return an error. +func newRangeDecoder(br io.ByteReader) (d *rangeDecoder, err error) { + d = &rangeDecoder{br: br, nrange: 0xffffffff} + + b, err := d.br.ReadByte() + if err != nil { + return nil, err + } + if b != 0 { + return nil, errors.New("newRangeDecoder: first byte not zero") + } + + for i := 0; i < 4; i++ { + if err = d.updateCode(); err != nil { + return nil, err + } + } + + if d.code >= d.nrange { + return nil, errors.New("newRangeDecoder: d.code >= d.nrange") + } + + return d, nil +} + +// possiblyAtEnd checks whether the decoder may be at the end of the stream. +func (d *rangeDecoder) possiblyAtEnd() bool { + return d.code == 0 +} + +// DirectDecodeBit decodes a bit with probability 1/2. The return value b will +// contain the bit at the least-significant position. All other bits will be +// zero. +func (d *rangeDecoder) DirectDecodeBit() (b uint32, err error) { + d.nrange >>= 1 + d.code -= d.nrange + t := 0 - (d.code >> 31) + d.code += d.nrange & t + b = (t + 1) & 1 + + // d.code will stay less then d.nrange + + // normalize + // assume d.code < d.nrange + const top = 1 << 24 + if d.nrange >= top { + return b, nil + } + d.nrange <<= 8 + // d.code < d.nrange will be maintained + return b, d.updateCode() +} + +// decodeBit decodes a single bit. The bit will be returned at the +// least-significant position. All other bits will be zero. The probability +// value will be updated. +func (d *rangeDecoder) DecodeBit(p *prob) (b uint32, err error) { + bound := p.bound(d.nrange) + if d.code < bound { + d.nrange = bound + p.inc() + b = 0 + } else { + d.code -= bound + d.nrange -= bound + p.dec() + b = 1 + } + // normalize + // assume d.code < d.nrange + const top = 1 << 24 + if d.nrange >= top { + return b, nil + } + d.nrange <<= 8 + // d.code < d.nrange will be maintained + return b, d.updateCode() +} + +// updateCode reads a new byte into the code. +func (d *rangeDecoder) updateCode() error { + b, err := d.br.ReadByte() + if err != nil { + return err + } + d.code = (d.code << 8) | uint32(b) + return nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/reader.go b/vendor/github.com/ulikunitz/xz/lzma/reader.go new file mode 100644 index 000000000..7b7eef31f --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/reader.go @@ -0,0 +1,100 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lzma supports the decoding and encoding of LZMA streams. +// Reader and Writer support the classic LZMA format. Reader2 and +// Writer2 support the decoding and encoding of LZMA2 streams. +// +// The package is written completely in Go and doesn't rely on any external +// library. +package lzma + +import ( + "errors" + "io" +) + +// ReaderConfig stores the parameters for the reader of the classic LZMA +// format. +type ReaderConfig struct { + DictCap int +} + +// fill converts the zero values of the configuration to the default values. +func (c *ReaderConfig) fill() { + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } +} + +// Verify checks the reader configuration for errors. Zero values will +// be replaced by default values. +func (c *ReaderConfig) Verify() error { + c.fill() + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + return nil +} + +// Reader provides a reader for LZMA files or streams. +type Reader struct { + lzma io.Reader + h header + d *decoder +} + +// NewReader creates a new reader for an LZMA stream using the classic +// format. NewReader reads and checks the header of the LZMA stream. +func NewReader(lzma io.Reader) (r *Reader, err error) { + return ReaderConfig{}.NewReader(lzma) +} + +// NewReader creates a new reader for an LZMA stream in the classic +// format. The function reads and verifies the the header of the LZMA +// stream. +func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + data := make([]byte, HeaderLen) + if _, err := io.ReadFull(lzma, data); err != nil { + if err == io.EOF { + return nil, errors.New("lzma: unexpected EOF") + } + return nil, err + } + r = &Reader{lzma: lzma} + if err = r.h.unmarshalBinary(data); err != nil { + return nil, err + } + if r.h.dictCap < MinDictCap { + return nil, errors.New("lzma: dictionary capacity too small") + } + dictCap := r.h.dictCap + if c.DictCap > dictCap { + dictCap = c.DictCap + } + + state := newState(r.h.properties) + dict, err := newDecoderDict(dictCap) + if err != nil { + return nil, err + } + r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size) + if err != nil { + return nil, err + } + return r, nil +} + +// EOSMarker indicates that an EOS marker has been encountered. +func (r *Reader) EOSMarker() bool { + return r.d.eosMarker +} + +// Read returns uncompressed data. +func (r *Reader) Read(p []byte) (n int, err error) { + return r.d.Read(p) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/reader2.go b/vendor/github.com/ulikunitz/xz/lzma/reader2.go new file mode 100644 index 000000000..33074e624 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/reader2.go @@ -0,0 +1,232 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" + + "github.com/ulikunitz/xz/internal/xlog" +) + +// Reader2Config stores the parameters for the LZMA2 reader. +// format. +type Reader2Config struct { + DictCap int +} + +// fill converts the zero values of the configuration to the default values. +func (c *Reader2Config) fill() { + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } +} + +// Verify checks the reader configuration for errors. Zero configuration values +// will be replaced by default values. +func (c *Reader2Config) Verify() error { + c.fill() + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + return nil +} + +// Reader2 supports the reading of LZMA2 chunk sequences. Note that the +// first chunk should have a dictionary reset and the first compressed +// chunk a properties reset. The chunk sequence may not be terminated by +// an end-of-stream chunk. +type Reader2 struct { + r io.Reader + err error + + dict *decoderDict + ur *uncompressedReader + decoder *decoder + chunkReader io.Reader + + cstate chunkState + ctype chunkType +} + +// NewReader2 creates a reader for an LZMA2 chunk sequence. +func NewReader2(lzma2 io.Reader) (r *Reader2, err error) { + return Reader2Config{}.NewReader2(lzma2) +} + +// NewReader2 creates an LZMA2 reader using the given configuration. +func (c Reader2Config) NewReader2(lzma2 io.Reader) (r *Reader2, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + r = &Reader2{r: lzma2, cstate: start} + r.dict, err = newDecoderDict(c.DictCap) + if err != nil { + return nil, err + } + if err = r.startChunk(); err != nil { + r.err = err + } + return r, nil +} + +// uncompressed tests whether the chunk type specifies an uncompressed +// chunk. +func uncompressed(ctype chunkType) bool { + return ctype == cU || ctype == cUD +} + +// startChunk parses a new chunk. +func (r *Reader2) startChunk() error { + r.chunkReader = nil + header, err := readChunkHeader(r.r) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + xlog.Debugf("chunk header %v", header) + if err = r.cstate.next(header.ctype); err != nil { + return err + } + if r.cstate == stop { + return io.EOF + } + if header.ctype == cUD || header.ctype == cLRND { + r.dict.Reset() + } + size := int64(header.uncompressed) + 1 + if uncompressed(header.ctype) { + if r.ur != nil { + r.ur.Reopen(r.r, size) + } else { + r.ur = newUncompressedReader(r.r, r.dict, size) + } + r.chunkReader = r.ur + return nil + } + br := ByteReader(io.LimitReader(r.r, int64(header.compressed)+1)) + if r.decoder == nil { + state := newState(header.props) + r.decoder, err = newDecoder(br, state, r.dict, size) + if err != nil { + return err + } + r.chunkReader = r.decoder + return nil + } + switch header.ctype { + case cLR: + r.decoder.State.Reset() + case cLRN, cLRND: + r.decoder.State = newState(header.props) + } + err = r.decoder.Reopen(br, size) + if err != nil { + return err + } + r.chunkReader = r.decoder + return nil +} + +// Read reads data from the LZMA2 chunk sequence. +func (r *Reader2) Read(p []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + for n < len(p) { + var k int + k, err = r.chunkReader.Read(p[n:]) + n += k + if err != nil { + if err == io.EOF { + err = r.startChunk() + if err == nil { + continue + } + } + r.err = err + return n, err + } + if k == 0 { + r.err = errors.New("lzma: Reader2 doesn't get data") + return n, r.err + } + } + return n, nil +} + +// EOS returns whether the LZMA2 stream has been terminated by an +// end-of-stream chunk. +func (r *Reader2) EOS() bool { + return r.cstate == stop +} + +// uncompressedReader is used to read uncompressed chunks. +type uncompressedReader struct { + lr io.LimitedReader + Dict *decoderDict + eof bool + err error +} + +// newUncompressedReader initializes a new uncompressedReader. +func newUncompressedReader(r io.Reader, dict *decoderDict, size int64) *uncompressedReader { + ur := &uncompressedReader{ + lr: io.LimitedReader{R: r, N: size}, + Dict: dict, + } + return ur +} + +// Reopen reinitializes an uncompressed reader. +func (ur *uncompressedReader) Reopen(r io.Reader, size int64) { + ur.err = nil + ur.eof = false + ur.lr = io.LimitedReader{R: r, N: size} +} + +// fill reads uncompressed data into the dictionary. +func (ur *uncompressedReader) fill() error { + if !ur.eof { + n, err := io.CopyN(ur.Dict, &ur.lr, int64(ur.Dict.Available())) + if err != io.EOF { + return err + } + ur.eof = true + if n > 0 { + return nil + } + } + if ur.lr.N != 0 { + return io.ErrUnexpectedEOF + } + return io.EOF +} + +// Read reads uncompressed data from the limited reader. +func (ur *uncompressedReader) Read(p []byte) (n int, err error) { + if ur.err != nil { + return 0, ur.err + } + for { + var k int + k, err = ur.Dict.Read(p[n:]) + n += k + if n >= len(p) { + return n, nil + } + if err != nil { + break + } + err = ur.fill() + if err != nil { + break + } + } + ur.err = err + return n, err +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/state.go b/vendor/github.com/ulikunitz/xz/lzma/state.go new file mode 100644 index 000000000..03f061cf1 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/state.go @@ -0,0 +1,151 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// states defines the overall state count +const states = 12 + +// State maintains the full state of the operation encoding or decoding +// process. +type state struct { + rep [4]uint32 + isMatch [states << maxPosBits]prob + isRepG0Long [states << maxPosBits]prob + isRep [states]prob + isRepG0 [states]prob + isRepG1 [states]prob + isRepG2 [states]prob + litCodec literalCodec + lenCodec lengthCodec + repLenCodec lengthCodec + distCodec distCodec + state uint32 + posBitMask uint32 + Properties Properties +} + +// initProbSlice initializes a slice of probabilities. +func initProbSlice(p []prob) { + for i := range p { + p[i] = probInit + } +} + +// Reset sets all state information to the original values. +func (s *state) Reset() { + p := s.Properties + *s = state{ + Properties: p, + // dict: s.dict, + posBitMask: (uint32(1) << uint(p.PB)) - 1, + } + initProbSlice(s.isMatch[:]) + initProbSlice(s.isRep[:]) + initProbSlice(s.isRepG0[:]) + initProbSlice(s.isRepG1[:]) + initProbSlice(s.isRepG2[:]) + initProbSlice(s.isRepG0Long[:]) + s.litCodec.init(p.LC, p.LP) + s.lenCodec.init() + s.repLenCodec.init() + s.distCodec.init() +} + +// initState initializes the state. +func initState(s *state, p Properties) { + *s = state{Properties: p} + s.Reset() +} + +// newState creates a new state from the give Properties. +func newState(p Properties) *state { + s := &state{Properties: p} + s.Reset() + return s +} + +// deepcopy initializes s as a deep copy of the source. +func (s *state) deepcopy(src *state) { + if s == src { + return + } + s.rep = src.rep + s.isMatch = src.isMatch + s.isRepG0Long = src.isRepG0Long + s.isRep = src.isRep + s.isRepG0 = src.isRepG0 + s.isRepG1 = src.isRepG1 + s.isRepG2 = src.isRepG2 + s.litCodec.deepcopy(&src.litCodec) + s.lenCodec.deepcopy(&src.lenCodec) + s.repLenCodec.deepcopy(&src.repLenCodec) + s.distCodec.deepcopy(&src.distCodec) + s.state = src.state + s.posBitMask = src.posBitMask + s.Properties = src.Properties +} + +// cloneState creates a new clone of the give state. +func cloneState(src *state) *state { + s := new(state) + s.deepcopy(src) + return s +} + +// updateStateLiteral updates the state for a literal. +func (s *state) updateStateLiteral() { + switch { + case s.state < 4: + s.state = 0 + return + case s.state < 10: + s.state -= 3 + return + } + s.state -= 6 +} + +// updateStateMatch updates the state for a match. +func (s *state) updateStateMatch() { + if s.state < 7 { + s.state = 7 + } else { + s.state = 10 + } +} + +// updateStateRep updates the state for a repetition. +func (s *state) updateStateRep() { + if s.state < 7 { + s.state = 8 + } else { + s.state = 11 + } +} + +// updateStateShortRep updates the state for a short repetition. +func (s *state) updateStateShortRep() { + if s.state < 7 { + s.state = 9 + } else { + s.state = 11 + } +} + +// states computes the states of the operation codec. +func (s *state) states(dictHead int64) (state1, state2, posState uint32) { + state1 = s.state + posState = uint32(dictHead) & s.posBitMask + state2 = (s.state << maxPosBits) | posState + return +} + +// litState computes the literal state. +func (s *state) litState(prev byte, dictHead int64) uint32 { + lp, lc := uint(s.Properties.LP), uint(s.Properties.LC) + litState := ((uint32(dictHead) & ((1 << lp) - 1)) << lc) | + (uint32(prev) >> (8 - lc)) + return litState +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go b/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go new file mode 100644 index 000000000..1cb3596fe --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go @@ -0,0 +1,133 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// treeCodec encodes or decodes values with a fixed bit size. It is using a +// tree of probability value. The root of the tree is the most-significant bit. +type treeCodec struct { + probTree +} + +// makeTreeCodec makes a tree codec. The bits value must be inside the range +// [1,32]. +func makeTreeCodec(bits int) treeCodec { + return treeCodec{makeProbTree(bits)} +} + +// deepcopy initializes tc as a deep copy of the source. +func (tc *treeCodec) deepcopy(src *treeCodec) { + tc.probTree.deepcopy(&src.probTree) +} + +// Encode uses the range encoder to encode a fixed-bit-size value. +func (tc *treeCodec) Encode(e *rangeEncoder, v uint32) (err error) { + m := uint32(1) + for i := int(tc.bits) - 1; i >= 0; i-- { + b := (v >> uint(i)) & 1 + if err := e.EncodeBit(b, &tc.probs[m]); err != nil { + return err + } + m = (m << 1) | b + } + return nil +} + +// Decodes uses the range decoder to decode a fixed-bit-size value. Errors may +// be caused by the range decoder. +func (tc *treeCodec) Decode(d *rangeDecoder) (v uint32, err error) { + m := uint32(1) + for j := 0; j < int(tc.bits); j++ { + b, err := d.DecodeBit(&tc.probs[m]) + if err != nil { + return 0, err + } + m = (m << 1) | b + } + return m - (1 << uint(tc.bits)), nil +} + +// treeReverseCodec is another tree codec, where the least-significant bit is +// the start of the probability tree. +type treeReverseCodec struct { + probTree +} + +// deepcopy initializes the treeReverseCodec as a deep copy of the +// source. +func (tc *treeReverseCodec) deepcopy(src *treeReverseCodec) { + tc.probTree.deepcopy(&src.probTree) +} + +// makeTreeReverseCodec creates treeReverseCodec value. The bits argument must +// be in the range [1,32]. +func makeTreeReverseCodec(bits int) treeReverseCodec { + return treeReverseCodec{makeProbTree(bits)} +} + +// Encode uses range encoder to encode a fixed-bit-size value. The range +// encoder may cause errors. +func (tc *treeReverseCodec) Encode(v uint32, e *rangeEncoder) (err error) { + m := uint32(1) + for i := uint(0); i < uint(tc.bits); i++ { + b := (v >> i) & 1 + if err := e.EncodeBit(b, &tc.probs[m]); err != nil { + return err + } + m = (m << 1) | b + } + return nil +} + +// Decodes uses the range decoder to decode a fixed-bit-size value. Errors +// returned by the range decoder will be returned. +func (tc *treeReverseCodec) Decode(d *rangeDecoder) (v uint32, err error) { + m := uint32(1) + for j := uint(0); j < uint(tc.bits); j++ { + b, err := d.DecodeBit(&tc.probs[m]) + if err != nil { + return 0, err + } + m = (m << 1) | b + v |= b << j + } + return v, nil +} + +// probTree stores enough probability values to be used by the treeEncode and +// treeDecode methods of the range coder types. +type probTree struct { + probs []prob + bits byte +} + +// deepcopy initializes the probTree value as a deep copy of the source. +func (t *probTree) deepcopy(src *probTree) { + if t == src { + return + } + t.probs = make([]prob, len(src.probs)) + copy(t.probs, src.probs) + t.bits = src.bits +} + +// makeProbTree initializes a probTree structure. +func makeProbTree(bits int) probTree { + if !(1 <= bits && bits <= 32) { + panic("bits outside of range [1,32]") + } + t := probTree{ + bits: byte(bits), + probs: make([]prob, 1< 0 { + c.SizeInHeader = true + } + if !c.SizeInHeader { + c.EOSMarker = true + } +} + +// Verify checks WriterConfig for errors. Verify will replace zero +// values with default values. +func (c *WriterConfig) Verify() error { + c.fill() + var err error + if c == nil { + return errors.New("lzma: WriterConfig is nil") + } + if c.Properties == nil { + return errors.New("lzma: WriterConfig has no Properties set") + } + if err = c.Properties.verify(); err != nil { + return err + } + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + if !(maxMatchLen <= c.BufSize) { + return errors.New("lzma: lookahead buffer size too small") + } + if c.SizeInHeader { + if c.Size < 0 { + return errors.New("lzma: negative size not supported") + } + } else if !c.EOSMarker { + return errors.New("lzma: EOS marker is required") + } + if err = c.Matcher.verify(); err != nil { + return err + } + + return nil +} + +// header returns the header structure for this configuration. +func (c *WriterConfig) header() header { + h := header{ + properties: *c.Properties, + dictCap: c.DictCap, + size: -1, + } + if c.SizeInHeader { + h.size = c.Size + } + return h +} + +// Writer writes an LZMA stream in the classic format. +type Writer struct { + h header + bw io.ByteWriter + buf *bufio.Writer + e *encoder +} + +// NewWriter creates a new LZMA writer for the classic format. The +// method will write the header to the underlying stream. +func (c WriterConfig) NewWriter(lzma io.Writer) (w *Writer, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + w = &Writer{h: c.header()} + + var ok bool + w.bw, ok = lzma.(io.ByteWriter) + if !ok { + w.buf = bufio.NewWriter(lzma) + w.bw = w.buf + } + state := newState(w.h.properties) + m, err := c.Matcher.new(w.h.dictCap) + if err != nil { + return nil, err + } + dict, err := newEncoderDict(w.h.dictCap, c.BufSize, m) + if err != nil { + return nil, err + } + var flags encoderFlags + if c.EOSMarker { + flags = eosMarker + } + if w.e, err = newEncoder(w.bw, state, dict, flags); err != nil { + return nil, err + } + + if err = w.writeHeader(); err != nil { + return nil, err + } + return w, nil +} + +// NewWriter creates a new LZMA writer using the classic format. The +// function writes the header to the underlying stream. +func NewWriter(lzma io.Writer) (w *Writer, err error) { + return WriterConfig{}.NewWriter(lzma) +} + +// writeHeader writes the LZMA header into the stream. +func (w *Writer) writeHeader() error { + data, err := w.h.marshalBinary() + if err != nil { + return err + } + _, err = w.bw.(io.Writer).Write(data) + return err +} + +// Write puts data into the Writer. +func (w *Writer) Write(p []byte) (n int, err error) { + if w.h.size >= 0 { + m := w.h.size + m -= w.e.Compressed() + int64(w.e.dict.Buffered()) + if m < 0 { + m = 0 + } + if m < int64(len(p)) { + p = p[:m] + err = ErrNoSpace + } + } + var werr error + if n, werr = w.e.Write(p); werr != nil { + err = werr + } + return n, err +} + +// Close closes the writer stream. It ensures that all data from the +// buffer will be compressed and the LZMA stream will be finished. +func (w *Writer) Close() error { + if w.h.size >= 0 { + n := w.e.Compressed() + int64(w.e.dict.Buffered()) + if n != w.h.size { + return errSize + } + } + err := w.e.Close() + if w.buf != nil { + ferr := w.buf.Flush() + if err == nil { + err = ferr + } + } + return err +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/writer2.go b/vendor/github.com/ulikunitz/xz/lzma/writer2.go new file mode 100644 index 000000000..c263b0666 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/writer2.go @@ -0,0 +1,305 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "bytes" + "errors" + "io" +) + +// Writer2Config is used to create a Writer2 using parameters. +type Writer2Config struct { + // The properties for the encoding. If the it is nil the value + // {LC: 3, LP: 0, PB: 2} will be chosen. + Properties *Properties + // The capacity of the dictionary. If DictCap is zero, the value + // 8 MiB will be chosen. + DictCap int + // Size of the lookahead buffer; value 0 indicates default size + // 4096 + BufSize int + // Match algorithm + Matcher MatchAlgorithm +} + +// fill replaces zero values with default values. +func (c *Writer2Config) fill() { + if c.Properties == nil { + c.Properties = &Properties{LC: 3, LP: 0, PB: 2} + } + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } + if c.BufSize == 0 { + c.BufSize = 4096 + } +} + +// Verify checks the Writer2Config for correctness. Zero values will be +// replaced by default values. +func (c *Writer2Config) Verify() error { + c.fill() + var err error + if c == nil { + return errors.New("lzma: WriterConfig is nil") + } + if c.Properties == nil { + return errors.New("lzma: WriterConfig has no Properties set") + } + if err = c.Properties.verify(); err != nil { + return err + } + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + if !(maxMatchLen <= c.BufSize) { + return errors.New("lzma: lookahead buffer size too small") + } + if c.Properties.LC+c.Properties.LP > 4 { + return errors.New("lzma: sum of lc and lp exceeds 4") + } + if err = c.Matcher.verify(); err != nil { + return err + } + return nil +} + +// Writer2 supports the creation of an LZMA2 stream. But note that +// written data is buffered, so call Flush or Close to write data to the +// underlying writer. The Close method writes the end-of-stream marker +// to the stream. So you may be able to concatenate the output of two +// writers as long the output of the first writer has only been flushed +// but not closed. +// +// Any change to the fields Properties, DictCap must be done before the +// first call to Write, Flush or Close. +type Writer2 struct { + w io.Writer + + start *state + encoder *encoder + + cstate chunkState + ctype chunkType + + buf bytes.Buffer + lbw LimitedByteWriter +} + +// NewWriter2 creates an LZMA2 chunk sequence writer with the default +// parameters and options. +func NewWriter2(lzma2 io.Writer) (w *Writer2, err error) { + return Writer2Config{}.NewWriter2(lzma2) +} + +// NewWriter2 creates a new LZMA2 writer using the given configuration. +func (c Writer2Config) NewWriter2(lzma2 io.Writer) (w *Writer2, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + w = &Writer2{ + w: lzma2, + start: newState(*c.Properties), + cstate: start, + ctype: start.defaultChunkType(), + } + w.buf.Grow(maxCompressed) + w.lbw = LimitedByteWriter{BW: &w.buf, N: maxCompressed} + m, err := c.Matcher.new(c.DictCap) + if err != nil { + return nil, err + } + d, err := newEncoderDict(c.DictCap, c.BufSize, m) + if err != nil { + return nil, err + } + w.encoder, err = newEncoder(&w.lbw, cloneState(w.start), d, 0) + if err != nil { + return nil, err + } + return w, nil +} + +// written returns the number of bytes written to the current chunk +func (w *Writer2) written() int { + if w.encoder == nil { + return 0 + } + return int(w.encoder.Compressed()) + w.encoder.dict.Buffered() +} + +// errClosed indicates that the writer is closed. +var errClosed = errors.New("lzma: writer closed") + +// Writes data to LZMA2 stream. Note that written data will be buffered. +// Use Flush or Close to ensure that data is written to the underlying +// writer. +func (w *Writer2) Write(p []byte) (n int, err error) { + if w.cstate == stop { + return 0, errClosed + } + for n < len(p) { + m := maxUncompressed - w.written() + if m <= 0 { + panic("lzma: maxUncompressed reached") + } + var q []byte + if n+m < len(p) { + q = p[n : n+m] + } else { + q = p[n:] + } + k, err := w.encoder.Write(q) + n += k + if err != nil && err != ErrLimit { + return n, err + } + if err == ErrLimit || k == m { + if err = w.flushChunk(); err != nil { + return n, err + } + } + } + return n, nil +} + +// writeUncompressedChunk writes an uncompressed chunk to the LZMA2 +// stream. +func (w *Writer2) writeUncompressedChunk() error { + u := w.encoder.Compressed() + if u <= 0 { + return errors.New("lzma: can't write empty uncompressed chunk") + } + if u > maxUncompressed { + panic("overrun of uncompressed data limit") + } + switch w.ctype { + case cLRND: + w.ctype = cUD + default: + w.ctype = cU + } + w.encoder.state = w.start + + header := chunkHeader{ + ctype: w.ctype, + uncompressed: uint32(u - 1), + } + hdata, err := header.MarshalBinary() + if err != nil { + return err + } + if _, err = w.w.Write(hdata); err != nil { + return err + } + _, err = w.encoder.dict.CopyN(w.w, int(u)) + return err +} + +// writeCompressedChunk writes a compressed chunk to the underlying +// writer. +func (w *Writer2) writeCompressedChunk() error { + if w.ctype == cU || w.ctype == cUD { + panic("chunk type uncompressed") + } + + u := w.encoder.Compressed() + if u <= 0 { + return errors.New("writeCompressedChunk: empty chunk") + } + if u > maxUncompressed { + panic("overrun of uncompressed data limit") + } + c := w.buf.Len() + if c <= 0 { + panic("no compressed data") + } + if c > maxCompressed { + panic("overrun of compressed data limit") + } + header := chunkHeader{ + ctype: w.ctype, + uncompressed: uint32(u - 1), + compressed: uint16(c - 1), + props: w.encoder.state.Properties, + } + hdata, err := header.MarshalBinary() + if err != nil { + return err + } + if _, err = w.w.Write(hdata); err != nil { + return err + } + _, err = io.Copy(w.w, &w.buf) + return err +} + +// writes a single chunk to the underlying writer. +func (w *Writer2) writeChunk() error { + u := int(uncompressedHeaderLen + w.encoder.Compressed()) + c := headerLen(w.ctype) + w.buf.Len() + if u < c { + return w.writeUncompressedChunk() + } + return w.writeCompressedChunk() +} + +// flushChunk terminates the current chunk. The encoder will be reset +// to support the next chunk. +func (w *Writer2) flushChunk() error { + if w.written() == 0 { + return nil + } + var err error + if err = w.encoder.Close(); err != nil { + return err + } + if err = w.writeChunk(); err != nil { + return err + } + w.buf.Reset() + w.lbw.N = maxCompressed + if err = w.encoder.Reopen(&w.lbw); err != nil { + return err + } + if err = w.cstate.next(w.ctype); err != nil { + return err + } + w.ctype = w.cstate.defaultChunkType() + w.start = cloneState(w.encoder.state) + return nil +} + +// Flush writes all buffered data out to the underlying stream. This +// could result in multiple chunks to be created. +func (w *Writer2) Flush() error { + if w.cstate == stop { + return errClosed + } + for w.written() > 0 { + if err := w.flushChunk(); err != nil { + return err + } + } + return nil +} + +// Close terminates the LZMA2 stream with an EOS chunk. +func (w *Writer2) Close() error { + if w.cstate == stop { + return errClosed + } + if err := w.Flush(); err != nil { + return nil + } + // write zero byte EOS chunk + _, err := w.w.Write([]byte{0}) + if err != nil { + return err + } + w.cstate = stop + return nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzmafilter.go b/vendor/github.com/ulikunitz/xz/lzmafilter.go new file mode 100644 index 000000000..6f4aa2c09 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzmafilter.go @@ -0,0 +1,117 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xz + +import ( + "errors" + "fmt" + "io" + + "github.com/ulikunitz/xz/lzma" +) + +// LZMA filter constants. +const ( + lzmaFilterID = 0x21 + lzmaFilterLen = 3 +) + +// lzmaFilter declares the LZMA2 filter information stored in an xz +// block header. +type lzmaFilter struct { + dictCap int64 +} + +// String returns a representation of the LZMA filter. +func (f lzmaFilter) String() string { + return fmt.Sprintf("LZMA dict cap %#x", f.dictCap) +} + +// id returns the ID for the LZMA2 filter. +func (f lzmaFilter) id() uint64 { return lzmaFilterID } + +// MarshalBinary converts the lzmaFilter in its encoded representation. +func (f lzmaFilter) MarshalBinary() (data []byte, err error) { + c := lzma.EncodeDictCap(f.dictCap) + return []byte{lzmaFilterID, 1, c}, nil +} + +// UnmarshalBinary unmarshals the given data representation of the LZMA2 +// filter. +func (f *lzmaFilter) UnmarshalBinary(data []byte) error { + if len(data) != lzmaFilterLen { + return errors.New("xz: data for LZMA2 filter has wrong length") + } + if data[0] != lzmaFilterID { + return errors.New("xz: wrong LZMA2 filter id") + } + if data[1] != 1 { + return errors.New("xz: wrong LZMA2 filter size") + } + dc, err := lzma.DecodeDictCap(data[2]) + if err != nil { + return errors.New("xz: wrong LZMA2 dictionary size property") + } + + f.dictCap = dc + return nil +} + +// reader creates a new reader for the LZMA2 filter. +func (f lzmaFilter) reader(r io.Reader, c *ReaderConfig) (fr io.Reader, + err error) { + + config := new(lzma.Reader2Config) + if c != nil { + config.DictCap = c.DictCap + } + dc := int(f.dictCap) + if dc < 1 { + return nil, errors.New("xz: LZMA2 filter parameter " + + "dictionary capacity overflow") + } + if dc > config.DictCap { + config.DictCap = dc + } + + fr, err = config.NewReader2(r) + if err != nil { + return nil, err + } + return fr, nil +} + +// writeCloser creates a io.WriteCloser for the LZMA2 filter. +func (f lzmaFilter) writeCloser(w io.WriteCloser, c *WriterConfig, +) (fw io.WriteCloser, err error) { + config := new(lzma.Writer2Config) + if c != nil { + *config = lzma.Writer2Config{ + Properties: c.Properties, + DictCap: c.DictCap, + BufSize: c.BufSize, + Matcher: c.Matcher, + } + } + + dc := int(f.dictCap) + if dc < 1 { + return nil, errors.New("xz: LZMA2 filter parameter " + + "dictionary capacity overflow") + } + if dc > config.DictCap { + config.DictCap = dc + } + + fw, err = config.NewWriter2(w) + if err != nil { + return nil, err + } + return fw, nil +} + +// last returns true, because an LZMA2 filter must be the last filter in +// the filter list. +func (f lzmaFilter) last() bool { return true } diff --git a/vendor/github.com/ulikunitz/xz/make-docs b/vendor/github.com/ulikunitz/xz/make-docs new file mode 100644 index 000000000..a8c612ce1 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/make-docs @@ -0,0 +1,5 @@ +#!/bin/sh + +set -x +pandoc -t html5 -f markdown -s --css=doc/md.css -o README.html README.md +pandoc -t html5 -f markdown -s --css=doc/md.css -o TODO.html TODO.md diff --git a/vendor/github.com/ulikunitz/xz/none-check.go b/vendor/github.com/ulikunitz/xz/none-check.go new file mode 100644 index 000000000..e12d8e476 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/none-check.go @@ -0,0 +1,23 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xz + +import "hash" + +type noneHash struct{} + +func (h noneHash) Write(p []byte) (n int, err error) { return len(p), nil } + +func (h noneHash) Sum(b []byte) []byte { return b } + +func (h noneHash) Reset() {} + +func (h noneHash) Size() int { return 0 } + +func (h noneHash) BlockSize() int { return 0 } + +func newNoneHash() hash.Hash { + return &noneHash{} +} diff --git a/vendor/github.com/ulikunitz/xz/reader.go b/vendor/github.com/ulikunitz/xz/reader.go new file mode 100644 index 000000000..22cd6d500 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/reader.go @@ -0,0 +1,377 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package xz supports the compression and decompression of xz files. It +// supports version 1.0.4 of the specification without the non-LZMA2 +// filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt +package xz + +import ( + "bytes" + "errors" + "fmt" + "hash" + "io" + + "github.com/ulikunitz/xz/internal/xlog" + "github.com/ulikunitz/xz/lzma" +) + +// ReaderConfig defines the parameters for the xz reader. The +// SingleStream parameter requests the reader to assume that the +// underlying stream contains only a single stream. +type ReaderConfig struct { + DictCap int + SingleStream bool +} + +// fill replaces all zero values with their default values. +func (c *ReaderConfig) fill() { + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } +} + +// Verify checks the reader parameters for Validity. Zero values will be +// replaced by default values. +func (c *ReaderConfig) Verify() error { + if c == nil { + return errors.New("xz: reader parameters are nil") + } + lc := lzma.Reader2Config{DictCap: c.DictCap} + if err := lc.Verify(); err != nil { + return err + } + return nil +} + +// Reader supports the reading of one or multiple xz streams. +type Reader struct { + ReaderConfig + + xz io.Reader + sr *streamReader +} + +// streamReader decodes a single xz stream +type streamReader struct { + ReaderConfig + + xz io.Reader + br *blockReader + newHash func() hash.Hash + h header + index []record +} + +// NewReader creates a new xz reader using the default parameters. +// The function reads and checks the header of the first XZ stream. The +// reader will process multiple streams including padding. +func NewReader(xz io.Reader) (r *Reader, err error) { + return ReaderConfig{}.NewReader(xz) +} + +// NewReader creates an xz stream reader. The created reader will be +// able to process multiple streams and padding unless a SingleStream +// has been set in the reader configuration c. +func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + r = &Reader{ + ReaderConfig: c, + xz: xz, + } + if r.sr, err = c.newStreamReader(xz); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + return r, nil +} + +var errUnexpectedData = errors.New("xz: unexpected data after stream") + +// Read reads uncompressed data from the stream. +func (r *Reader) Read(p []byte) (n int, err error) { + for n < len(p) { + if r.sr == nil { + if r.SingleStream { + data := make([]byte, 1) + _, err = io.ReadFull(r.xz, data) + if err != io.EOF { + return n, errUnexpectedData + } + return n, io.EOF + } + for { + r.sr, err = r.ReaderConfig.newStreamReader(r.xz) + if err != errPadding { + break + } + } + if err != nil { + return n, err + } + } + k, err := r.sr.Read(p[n:]) + n += k + if err != nil { + if err == io.EOF { + r.sr = nil + continue + } + return n, err + } + } + return n, nil +} + +var errPadding = errors.New("xz: padding (4 zero bytes) encountered") + +// newStreamReader creates a new xz stream reader using the given configuration +// parameters. NewReader reads and checks the header of the xz stream. +func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + data := make([]byte, HeaderLen) + if _, err := io.ReadFull(xz, data[:4]); err != nil { + return nil, err + } + if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) { + return nil, errPadding + } + if _, err = io.ReadFull(xz, data[4:]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + r = &streamReader{ + ReaderConfig: c, + xz: xz, + index: make([]record, 0, 4), + } + if err = r.h.UnmarshalBinary(data); err != nil { + return nil, err + } + xlog.Debugf("xz header %s", r.h) + if r.newHash, err = newHashFunc(r.h.flags); err != nil { + return nil, err + } + return r, nil +} + +// errIndex indicates an error with the xz file index. +var errIndex = errors.New("xz: error in xz file index") + +// readTail reads the index body and the xz footer. +func (r *streamReader) readTail() error { + index, n, err := readIndexBody(r.xz) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + if len(index) != len(r.index) { + return fmt.Errorf("xz: index length is %d; want %d", + len(index), len(r.index)) + } + for i, rec := range r.index { + if rec != index[i] { + return fmt.Errorf("xz: record %d is %v; want %v", + i, rec, index[i]) + } + } + + p := make([]byte, footerLen) + if _, err = io.ReadFull(r.xz, p); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + var f footer + if err = f.UnmarshalBinary(p); err != nil { + return err + } + xlog.Debugf("xz footer %s", f) + if f.flags != r.h.flags { + return errors.New("xz: footer flags incorrect") + } + if f.indexSize != int64(n)+1 { + return errors.New("xz: index size in footer wrong") + } + return nil +} + +// Read reads actual data from the xz stream. +func (r *streamReader) Read(p []byte) (n int, err error) { + for n < len(p) { + if r.br == nil { + bh, hlen, err := readBlockHeader(r.xz) + if err != nil { + if err == errIndexIndicator { + if err = r.readTail(); err != nil { + return n, err + } + return n, io.EOF + } + return n, err + } + xlog.Debugf("block %v", *bh) + r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh, + hlen, r.newHash()) + if err != nil { + return n, err + } + } + k, err := r.br.Read(p[n:]) + n += k + if err != nil { + if err == io.EOF { + r.index = append(r.index, r.br.record()) + r.br = nil + } else { + return n, err + } + } + } + return n, nil +} + +// countingReader is a reader that counts the bytes read. +type countingReader struct { + r io.Reader + n int64 +} + +// Read reads data from the wrapped reader and adds it to the n field. +func (lr *countingReader) Read(p []byte) (n int, err error) { + n, err = lr.r.Read(p) + lr.n += int64(n) + return n, err +} + +// blockReader supports the reading of a block. +type blockReader struct { + lxz countingReader + header *blockHeader + headerLen int + n int64 + hash hash.Hash + r io.Reader + err error +} + +// newBlockReader creates a new block reader. +func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader, + hlen int, hash hash.Hash) (br *blockReader, err error) { + + br = &blockReader{ + lxz: countingReader{r: xz}, + header: h, + headerLen: hlen, + hash: hash, + } + + fr, err := c.newFilterReader(&br.lxz, h.filters) + if err != nil { + return nil, err + } + if br.hash.Size() != 0 { + br.r = io.TeeReader(fr, br.hash) + } else { + br.r = fr + } + + return br, nil +} + +// uncompressedSize returns the uncompressed size of the block. +func (br *blockReader) uncompressedSize() int64 { + return br.n +} + +// compressedSize returns the compressed size of the block. +func (br *blockReader) compressedSize() int64 { + return br.lxz.n +} + +// unpaddedSize computes the unpadded size for the block. +func (br *blockReader) unpaddedSize() int64 { + n := int64(br.headerLen) + n += br.compressedSize() + n += int64(br.hash.Size()) + return n +} + +// record returns the index record for the current block. +func (br *blockReader) record() record { + return record{br.unpaddedSize(), br.uncompressedSize()} +} + +// errBlockSize indicates that the size of the block in the block header +// is wrong. +var errBlockSize = errors.New("xz: wrong uncompressed size for block") + +// Read reads data from the block. +func (br *blockReader) Read(p []byte) (n int, err error) { + n, err = br.r.Read(p) + br.n += int64(n) + + u := br.header.uncompressedSize + if u >= 0 && br.uncompressedSize() > u { + return n, errors.New("xz: wrong uncompressed size for block") + } + c := br.header.compressedSize + if c >= 0 && br.compressedSize() > c { + return n, errors.New("xz: wrong compressed size for block") + } + if err != io.EOF { + return n, err + } + if br.uncompressedSize() < u || br.compressedSize() < c { + return n, io.ErrUnexpectedEOF + } + + s := br.hash.Size() + k := padLen(br.lxz.n) + q := make([]byte, k+s, k+2*s) + if _, err = io.ReadFull(br.lxz.r, q); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err + } + if !allZeros(q[:k]) { + return n, errors.New("xz: non-zero block padding") + } + checkSum := q[k:] + computedSum := br.hash.Sum(checkSum[s:]) + if !bytes.Equal(checkSum, computedSum) { + return n, errors.New("xz: checksum error for block") + } + return n, io.EOF +} + +func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader, + err error) { + + if err = verifyFilters(f); err != nil { + return nil, err + } + + fr = r + for i := len(f) - 1; i >= 0; i-- { + fr, err = f[i].reader(fr, c) + if err != nil { + return nil, err + } + } + return fr, nil +} diff --git a/vendor/github.com/ulikunitz/xz/writer.go b/vendor/github.com/ulikunitz/xz/writer.go new file mode 100644 index 000000000..aec10dfa6 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/writer.go @@ -0,0 +1,395 @@ +// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xz + +import ( + "errors" + "hash" + "io" + + "github.com/ulikunitz/xz/lzma" +) + +// WriterConfig describe the parameters for an xz writer. +type WriterConfig struct { + Properties *lzma.Properties + DictCap int + BufSize int + BlockSize int64 + // checksum method: CRC32, CRC64 or SHA256 (default: CRC64) + CheckSum byte + // Forces NoChecksum (default: false) + NoCheckSum bool + // match algorithm + Matcher lzma.MatchAlgorithm +} + +// fill replaces zero values with default values. +func (c *WriterConfig) fill() { + if c.Properties == nil { + c.Properties = &lzma.Properties{LC: 3, LP: 0, PB: 2} + } + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } + if c.BufSize == 0 { + c.BufSize = 4096 + } + if c.BlockSize == 0 { + c.BlockSize = maxInt64 + } + if c.CheckSum == 0 { + c.CheckSum = CRC64 + } + if c.NoCheckSum { + c.CheckSum = None + } +} + +// Verify checks the configuration for errors. Zero values will be +// replaced by default values. +func (c *WriterConfig) Verify() error { + if c == nil { + return errors.New("xz: writer configuration is nil") + } + c.fill() + lc := lzma.Writer2Config{ + Properties: c.Properties, + DictCap: c.DictCap, + BufSize: c.BufSize, + Matcher: c.Matcher, + } + if err := lc.Verify(); err != nil { + return err + } + if c.BlockSize <= 0 { + return errors.New("xz: block size out of range") + } + if err := verifyFlags(c.CheckSum); err != nil { + return err + } + return nil +} + +// filters creates the filter list for the given parameters. +func (c *WriterConfig) filters() []filter { + return []filter{&lzmaFilter{int64(c.DictCap)}} +} + +// maxInt64 defines the maximum 64-bit signed integer. +const maxInt64 = 1<<63 - 1 + +// verifyFilters checks the filter list for the length and the right +// sequence of filters. +func verifyFilters(f []filter) error { + if len(f) == 0 { + return errors.New("xz: no filters") + } + if len(f) > 4 { + return errors.New("xz: more than four filters") + } + for _, g := range f[:len(f)-1] { + if g.last() { + return errors.New("xz: last filter is not last") + } + } + if !f[len(f)-1].last() { + return errors.New("xz: wrong last filter") + } + return nil +} + +// newFilterWriteCloser converts a filter list into a WriteCloser that +// can be used by a blockWriter. +func (c *WriterConfig) newFilterWriteCloser(w io.Writer, f []filter) (fw io.WriteCloser, err error) { + if err = verifyFilters(f); err != nil { + return nil, err + } + fw = nopWriteCloser(w) + for i := len(f) - 1; i >= 0; i-- { + fw, err = f[i].writeCloser(fw, c) + if err != nil { + return nil, err + } + } + return fw, nil +} + +// nopWCloser implements a WriteCloser with a Close method not doing +// anything. +type nopWCloser struct { + io.Writer +} + +// Close returns nil and doesn't do anything else. +func (c nopWCloser) Close() error { + return nil +} + +// nopWriteCloser converts the Writer into a WriteCloser with a Close +// function that does nothing beside returning nil. +func nopWriteCloser(w io.Writer) io.WriteCloser { + return nopWCloser{w} +} + +// Writer compresses data written to it. It is an io.WriteCloser. +type Writer struct { + WriterConfig + + xz io.Writer + bw *blockWriter + newHash func() hash.Hash + h header + index []record + closed bool +} + +// newBlockWriter creates a new block writer writes the header out. +func (w *Writer) newBlockWriter() error { + var err error + w.bw, err = w.WriterConfig.newBlockWriter(w.xz, w.newHash()) + if err != nil { + return err + } + if err = w.bw.writeHeader(w.xz); err != nil { + return err + } + return nil +} + +// closeBlockWriter closes a block writer and records the sizes in the +// index. +func (w *Writer) closeBlockWriter() error { + var err error + if err = w.bw.Close(); err != nil { + return err + } + w.index = append(w.index, w.bw.record()) + return nil +} + +// NewWriter creates a new xz writer using default parameters. +func NewWriter(xz io.Writer) (w *Writer, err error) { + return WriterConfig{}.NewWriter(xz) +} + +// NewWriter creates a new Writer using the given configuration parameters. +func (c WriterConfig) NewWriter(xz io.Writer) (w *Writer, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + w = &Writer{ + WriterConfig: c, + xz: xz, + h: header{c.CheckSum}, + index: make([]record, 0, 4), + } + if w.newHash, err = newHashFunc(c.CheckSum); err != nil { + return nil, err + } + data, err := w.h.MarshalBinary() + if _, err = xz.Write(data); err != nil { + return nil, err + } + if err = w.newBlockWriter(); err != nil { + return nil, err + } + return w, nil + +} + +// Write compresses the uncompressed data provided. +func (w *Writer) Write(p []byte) (n int, err error) { + if w.closed { + return 0, errClosed + } + for { + k, err := w.bw.Write(p[n:]) + n += k + if err != errNoSpace { + return n, err + } + if err = w.closeBlockWriter(); err != nil { + return n, err + } + if err = w.newBlockWriter(); err != nil { + return n, err + } + } +} + +// Close closes the writer and adds the footer to the Writer. Close +// doesn't close the underlying writer. +func (w *Writer) Close() error { + if w.closed { + return errClosed + } + w.closed = true + var err error + if err = w.closeBlockWriter(); err != nil { + return err + } + + f := footer{flags: w.h.flags} + if f.indexSize, err = writeIndex(w.xz, w.index); err != nil { + return err + } + data, err := f.MarshalBinary() + if err != nil { + return err + } + if _, err = w.xz.Write(data); err != nil { + return err + } + return nil +} + +// countingWriter is a writer that counts all data written to it. +type countingWriter struct { + w io.Writer + n int64 +} + +// Write writes data to the countingWriter. +func (cw *countingWriter) Write(p []byte) (n int, err error) { + n, err = cw.w.Write(p) + cw.n += int64(n) + if err == nil && cw.n < 0 { + return n, errors.New("xz: counter overflow") + } + return +} + +// blockWriter is writes a single block. +type blockWriter struct { + cxz countingWriter + // mw combines io.WriteCloser w and the hash. + mw io.Writer + w io.WriteCloser + n int64 + blockSize int64 + closed bool + headerLen int + + filters []filter + hash hash.Hash +} + +// newBlockWriter creates a new block writer. +func (c *WriterConfig) newBlockWriter(xz io.Writer, hash hash.Hash) (bw *blockWriter, err error) { + bw = &blockWriter{ + cxz: countingWriter{w: xz}, + blockSize: c.BlockSize, + filters: c.filters(), + hash: hash, + } + bw.w, err = c.newFilterWriteCloser(&bw.cxz, bw.filters) + if err != nil { + return nil, err + } + if bw.hash.Size() != 0 { + bw.mw = io.MultiWriter(bw.w, bw.hash) + } else { + bw.mw = bw.w + } + return bw, nil +} + +// writeHeader writes the header. If the function is called after Close +// the commpressedSize and uncompressedSize fields will be filled. +func (bw *blockWriter) writeHeader(w io.Writer) error { + h := blockHeader{ + compressedSize: -1, + uncompressedSize: -1, + filters: bw.filters, + } + if bw.closed { + h.compressedSize = bw.compressedSize() + h.uncompressedSize = bw.uncompressedSize() + } + data, err := h.MarshalBinary() + if err != nil { + return err + } + if _, err = w.Write(data); err != nil { + return err + } + bw.headerLen = len(data) + return nil +} + +// compressed size returns the amount of data written to the underlying +// stream. +func (bw *blockWriter) compressedSize() int64 { + return bw.cxz.n +} + +// uncompressedSize returns the number of data written to the +// blockWriter +func (bw *blockWriter) uncompressedSize() int64 { + return bw.n +} + +// unpaddedSize returns the sum of the header length, the uncompressed +// size of the block and the hash size. +func (bw *blockWriter) unpaddedSize() int64 { + if bw.headerLen <= 0 { + panic("xz: block header not written") + } + n := int64(bw.headerLen) + n += bw.compressedSize() + n += int64(bw.hash.Size()) + return n +} + +// record returns the record for the current stream. Call Close before +// calling this method. +func (bw *blockWriter) record() record { + return record{bw.unpaddedSize(), bw.uncompressedSize()} +} + +var errClosed = errors.New("xz: writer already closed") + +var errNoSpace = errors.New("xz: no space") + +// Write writes uncompressed data to the block writer. +func (bw *blockWriter) Write(p []byte) (n int, err error) { + if bw.closed { + return 0, errClosed + } + + t := bw.blockSize - bw.n + if int64(len(p)) > t { + err = errNoSpace + p = p[:t] + } + + var werr error + n, werr = bw.mw.Write(p) + bw.n += int64(n) + if werr != nil { + return n, werr + } + return n, err +} + +// Close closes the writer. +func (bw *blockWriter) Close() error { + if bw.closed { + return errClosed + } + bw.closed = true + if err := bw.w.Close(); err != nil { + return err + } + s := bw.hash.Size() + k := padLen(bw.cxz.n) + p := make([]byte, k+s) + bw.hash.Sum(p[k:k]) + if _, err := bw.cxz.w.Write(p); err != nil { + return err + } + return nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 0ce1bcfac..f082e9ccf 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -9,8 +9,6 @@ github.com/Azure/azure-pipeline-go/pipeline # github.com/Azure/azure-storage-blob-go v0.8.0 github.com/Azure/azure-storage-blob-go/azblob # github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd -# github.com/OneOfOne/xxhash v1.2.7 -github.com/OneOfOne/xxhash github.com/Unknwon/goconfig # github.com/a8m/tree v0.0.0-20181222104329-6a0b80129de4 github.com/a8m/tree @@ -70,6 +68,8 @@ github.com/aws/aws-sdk-go/service/sts/stsiface github.com/beorn7/perks/quantile # github.com/billziss-gh/cgofuse v1.2.0 github.com/billziss-gh/cgofuse/fuse +# github.com/buengese/xxh32 v1.0.1 +github.com/buengese/xxh32 # github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 # github.com/cpuguy83/go-md2man/v2 v2.0.0 @@ -92,13 +92,11 @@ github.com/dropbox/dropbox-sdk-go-unofficial/dropbox/team_common github.com/dropbox/dropbox-sdk-go-unofficial/dropbox/team_policies github.com/dropbox/dropbox-sdk-go-unofficial/dropbox/users github.com/dropbox/dropbox-sdk-go-unofficial/dropbox/users_common -# github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e -# github.com/etcd-io/bbolt v1.3.3 -github.com/etcd-io/bbolt # github.com/gabriel-vasile/mimetype v1.0.2 github.com/gabriel-vasile/mimetype github.com/gabriel-vasile/mimetype/internal/json github.com/gabriel-vasile/mimetype/internal/matchers +# github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e github.com/golang/groupcache/lru # github.com/golang/protobuf v1.3.3 github.com/golang/protobuf/proto @@ -106,8 +104,6 @@ github.com/golang/protobuf/ptypes github.com/golang/protobuf/ptypes/any github.com/golang/protobuf/ptypes/duration github.com/golang/protobuf/ptypes/timestamp -# github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db -github.com/golang/snappy # github.com/google/go-querystring v1.0.0 github.com/google/go-querystring/query # github.com/googleapis/gax-go/v2 v2.0.5 @@ -118,8 +114,6 @@ github.com/hanwen/go-fuse/v2/fuse github.com/hanwen/go-fuse/v2/internal github.com/hanwen/go-fuse/v2/internal/utimens github.com/hanwen/go-fuse/v2/splice -# github.com/id01/go-lz4 v1.0.3 -github.com/id01/go-lz4 # github.com/inconshreveable/mousetrap v1.0.0 github.com/inconshreveable/mousetrap # github.com/jlaffaye/ftp v0.0.0-20191218041957-e1b8fdd0dcc3 @@ -130,6 +124,9 @@ github.com/jmespath/go-jmespath github.com/jzelinskie/whirlpool # github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 github.com/kardianos/osext +# github.com/klauspost/compress v1.10.1 +github.com/klauspost/compress/flate +github.com/klauspost/compress/gzip # github.com/konsorten/go-windows-terminal-sequences v1.0.2 github.com/konsorten/go-windows-terminal-sequences # github.com/koofr/go-httpclient v0.0.0-20190818202018-e0dc8fd921dc @@ -166,6 +163,9 @@ github.com/pengsrc/go-shared/check github.com/pengsrc/go-shared/convert github.com/pengsrc/go-shared/log github.com/pengsrc/go-shared/reopen +# github.com/pierrec/lz4 v2.4.1+incompatible +github.com/pierrec/lz4 +github.com/pierrec/lz4/internal/xxh32 # github.com/pkg/errors v0.9.1 github.com/pkg/errors # github.com/pkg/sftp v1.11.0 @@ -210,6 +210,11 @@ github.com/stretchr/testify/assert github.com/stretchr/testify/require # github.com/t3rm1n4l/go-mega v0.0.0-20200117211730-79a813bb328d github.com/t3rm1n4l/go-mega +# github.com/ulikunitz/xz v0.5.7 +github.com/ulikunitz/xz +github.com/ulikunitz/xz/internal/hash +github.com/ulikunitz/xz/internal/xlog +github.com/ulikunitz/xz/lzma # github.com/xanzy/ssh-agent v0.2.1 github.com/xanzy/ssh-agent # github.com/youmark/pkcs8 v0.0.0-20191102193632-94c173a94d60