lib: Add file name compression
Allows to compress short arbitrary strings and returns a string using base64 url encoding. Generator for tables included and a few samples has been added. Add more to init.go Tested with fuzzing for crash resistance and symmetry, see fuzz.go
This commit is contained in:
parent
770a6f2cad
commit
cb7534dcdf
8 changed files with 363 additions and 4 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -9,3 +9,4 @@ rclone.iml
|
||||||
*.test
|
*.test
|
||||||
*.log
|
*.log
|
||||||
*.iml
|
*.iml
|
||||||
|
fuzz-build.zip
|
||||||
|
|
1
go.mod
1
go.mod
|
@ -25,6 +25,7 @@ require (
|
||||||
github.com/jlaffaye/ftp v0.0.0-20200720194710-13949d38913e
|
github.com/jlaffaye/ftp v0.0.0-20200720194710-13949d38913e
|
||||||
github.com/jzelinskie/whirlpool v0.0.0-20170603002051-c19460b8caa6
|
github.com/jzelinskie/whirlpool v0.0.0-20170603002051-c19460b8caa6
|
||||||
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
|
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
|
||||||
|
github.com/klauspost/compress v1.10.11
|
||||||
github.com/koofr/go-httpclient v0.0.0-20200420163713-93aa7c75b348
|
github.com/koofr/go-httpclient v0.0.0-20200420163713-93aa7c75b348
|
||||||
github.com/koofr/go-koofrclient v0.0.0-20190724113126-8e5366da203a
|
github.com/koofr/go-koofrclient v0.0.0-20190724113126-8e5366da203a
|
||||||
github.com/mattn/go-colorable v0.1.7
|
github.com/mattn/go-colorable v0.1.7
|
||||||
|
|
6
go.sum
6
go.sum
|
@ -87,10 +87,6 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24
|
||||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/billziss-gh/cgofuse v1.3.0 h1:mFj8XQg/vvxMFywNy1F7IqFYcMeBqceYTh1+iUhpsk8=
|
|
||||||
github.com/billziss-gh/cgofuse v1.3.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM=
|
|
||||||
github.com/billziss-gh/cgofuse v1.3.1-0.20200703171401-45df47debffe h1:AXqxouOOD7FQuoVfZubWmMyHzOrrSGZbdh9o6PCtfKM=
|
|
||||||
github.com/billziss-gh/cgofuse v1.3.1-0.20200703171401-45df47debffe/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM=
|
|
||||||
github.com/billziss-gh/cgofuse v1.4.0 h1:kju2jDmdNuDDCrxPob2ggmZr5Mj/odCjU1Y8kx0Th9E=
|
github.com/billziss-gh/cgofuse v1.4.0 h1:kju2jDmdNuDDCrxPob2ggmZr5Mj/odCjU1Y8kx0Th9E=
|
||||||
github.com/billziss-gh/cgofuse v1.4.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM=
|
github.com/billziss-gh/cgofuse v1.4.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM=
|
||||||
github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
|
github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
|
||||||
|
@ -265,6 +261,8 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW
|
||||||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||||
github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4=
|
github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4=
|
||||||
|
github.com/klauspost/compress v1.10.11 h1:K9z59aO18Aywg2b/WSgBaUX99mHy2BES18Cr5lBKZHk=
|
||||||
|
github.com/klauspost/compress v1.10.11/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
|
|
84
lib/encoder/filename/decode.go
Normal file
84
lib/encoder/filename/decode.go
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
package filename
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/huff0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ErrCorrupted is returned if a provided encoded filename cannot be decoded.
|
||||||
|
var ErrCorrupted = errors.New("file name corrupt")
|
||||||
|
|
||||||
|
// ErrUnsupported is returned if a provided encoding may come from a future version or the file name is corrupt.
|
||||||
|
var ErrUnsupported = errors.New("file name possibly generated by future version of rclone")
|
||||||
|
|
||||||
|
// Custom decoder for tableCustom types. Stateful, so must have lock.
|
||||||
|
var customDec huff0.Scratch
|
||||||
|
var customDecMu sync.Mutex
|
||||||
|
|
||||||
|
// Decode an encoded string.
|
||||||
|
func Decode(s string) (string, error) {
|
||||||
|
if len(s) < 1 {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
table := decodeMap[s[0]]
|
||||||
|
if table == 0 {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
table--
|
||||||
|
s = s[1:]
|
||||||
|
|
||||||
|
data := make([]byte, base64.URLEncoding.DecodedLen(len(s)))
|
||||||
|
n, err := base64.URLEncoding.Decode(data, ([]byte)(s))
|
||||||
|
if err != nil || n < 0 {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
data = data[:n]
|
||||||
|
|
||||||
|
switch table {
|
||||||
|
case tableUncompressed:
|
||||||
|
return string(data), nil
|
||||||
|
case tableReserved:
|
||||||
|
return "", ErrUnsupported
|
||||||
|
case tableRLE:
|
||||||
|
if len(data) < 2 {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
n, used := binary.Uvarint(data[:len(data)-1])
|
||||||
|
if used <= 0 || n > maxLength {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
return string(bytes.Repeat(data[len(data)-1:], int(n))), nil
|
||||||
|
case tableCustom:
|
||||||
|
customDecMu.Lock()
|
||||||
|
defer customDecMu.Unlock()
|
||||||
|
_, data, err := huff0.ReadTable(data, &customDec)
|
||||||
|
if err != nil {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
customDec.MaxDecodedSize = maxLength
|
||||||
|
decoded, err := customDec.Decompress1X(data)
|
||||||
|
if err != nil {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
return string(decoded), nil
|
||||||
|
default:
|
||||||
|
if table >= byte(len(decTables)) {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
dec := decTables[table]
|
||||||
|
if dec == nil {
|
||||||
|
return "", ErrUnsupported
|
||||||
|
}
|
||||||
|
var dst [maxLength]byte
|
||||||
|
name, err := dec.Decompress1X(dst[:0], data)
|
||||||
|
if err != nil {
|
||||||
|
return "", ErrCorrupted
|
||||||
|
}
|
||||||
|
return string(name), nil
|
||||||
|
}
|
||||||
|
}
|
60
lib/encoder/filename/encode.go
Normal file
60
lib/encoder/filename/encode.go
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
package filename
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/binary"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/huff0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Encode will encode the string and return a base64 (url) compatible version of it.
|
||||||
|
// Calling Decode with the returned string should always succeed.
|
||||||
|
// It is not a requirement that the input string is valid utf-8.
|
||||||
|
func Encode(s string) string {
|
||||||
|
initCoders()
|
||||||
|
bestSize := len(s)
|
||||||
|
bestTable := tableUncompressed
|
||||||
|
org := []byte(s)
|
||||||
|
bestOut := []byte(s)
|
||||||
|
|
||||||
|
// Try all tables and choose the best
|
||||||
|
for i, enc := range encTables[:] {
|
||||||
|
if len(org) <= 1 || len(org) > maxLength {
|
||||||
|
// Use the uncompressed
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if enc == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Try to encode using table.
|
||||||
|
err := func() error {
|
||||||
|
encTableLocks[i].Lock()
|
||||||
|
defer encTableLocks[i].Unlock()
|
||||||
|
out, _, err := huff0.Compress1X(org, enc)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(out) < bestSize {
|
||||||
|
bestOut = bestOut[:len(out)]
|
||||||
|
bestTable = i
|
||||||
|
bestSize = len(out)
|
||||||
|
copy(bestOut, out)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}()
|
||||||
|
// If input is a single byte repeated store as RLE or save uncompressed.
|
||||||
|
if err == huff0.ErrUseRLE {
|
||||||
|
if len(org) > 2 {
|
||||||
|
// Encode as one byte repeated since it will be smaller than uncompressed.
|
||||||
|
n := binary.PutUvarint(bestOut, uint64(len(org)))
|
||||||
|
bestOut = bestOut[:n+1]
|
||||||
|
bestOut[n] = org[0]
|
||||||
|
bestSize = n + 1
|
||||||
|
bestTable = tableRLE
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(encodeURL[bestTable]) + base64.URLEncoding.EncodeToString(bestOut)
|
||||||
|
}
|
33
lib/encoder/filename/fuzz.go
Normal file
33
lib/encoder/filename/fuzz.go
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
//+build gofuzz
|
||||||
|
|
||||||
|
package filename
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Run like:
|
||||||
|
// go-fuzz-build -o=fuzz-build.zip -func=Fuzz . && go-fuzz -minimize=5s -bin=fuzz-build.zip -workdir=testdata/corpus -procs=24
|
||||||
|
|
||||||
|
// Fuzz test the provided input.
|
||||||
|
func Fuzz(data []byte) int {
|
||||||
|
// First try to decode as is.
|
||||||
|
// We don't care about the result, it just shouldn't crash.
|
||||||
|
Decode(string(data))
|
||||||
|
|
||||||
|
// Now encode
|
||||||
|
enc := Encode(string(data))
|
||||||
|
|
||||||
|
// And decoded must match
|
||||||
|
decoded, err := Decode(enc)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("error decoding %q, input %q: %v", enc, string(data), err))
|
||||||
|
}
|
||||||
|
if !bytes.Equal(data, []byte(decoded)) {
|
||||||
|
panic(fmt.Sprintf("decode mismatch, encoded: %q, org: %q, got: %q", enc, string(data), decoded))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Everything is good.
|
||||||
|
return 1
|
||||||
|
}
|
93
lib/encoder/filename/gentable.go
Normal file
93
lib/encoder/filename/gentable.go
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
//+build ignore
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress"
|
||||||
|
"github.com/klauspost/compress/huff0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Replace/add histogram data and execute go run gentable.go
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Allow non-represented characters.
|
||||||
|
const omitUnused = false
|
||||||
|
|
||||||
|
histogram := [256]uint64{
|
||||||
|
// ncw home directory
|
||||||
|
//0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19442, 760, 0, 349, 570, 1520, 199, 76, 685, 654, 0, 40377, 1605, 395132, 935270, 0, 1156377, 887730, 811737, 712241, 693240, 689139, 675964, 656417, 666577, 657413, 532, 24, 0, 145, 0, 3, 946, 44932, 37362, 46126, 36752, 76346, 19338, 47457, 14288, 38163, 4350, 7867, 36541, 65011, 30255, 26792, 22097, 1803, 39191, 61965, 76585, 11887, 12896, 5931, 1935, 1731, 1385, 1279, 9, 1278, 1, 420185, 0, 1146359, 746359, 968896, 868703, 1393640, 745019, 354147, 159462, 483979, 169092, 75937, 385858, 322166, 466635, 571268, 447132, 13792, 446484, 736844, 732675, 170232, 112983, 63184, 142357, 173945, 21521, 250, 0, 250, 4140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 39, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 15, 0, 0, 0, 10, 0, 5, 0, 0, 0, 0, 0, 0, 283, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
//Images:
|
||||||
|
//0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 765, 0, 0, 0, 0, 0, 8, 7, 3, 3, 0, 29, 53, 247265, 83587, 0, 265952, 233552, 229781, 71156, 78374, 65141, 46152, 43767, 55603, 39411, 0, 0, 0, 0, 0, 88, 84, 141, 70, 222, 191, 51, 52, 101, 60, 53, 23, 17, 49, 93, 53, 17, 92, 0, 158, 109, 41, 19, 43, 28, 10, 5, 1, 0, 0, 0, 0, 879, 0, 3415, 6770, 39823, 3566, 2491, 964, 42115, 825, 5178, 40755, 483, 1290, 3294, 1720, 6309, 42983, 10, 37739, 3454, 7028, 5077, 854, 227, 1259, 767, 218, 0, 0, 0, 163, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
// Google Drive:
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459, 0, 0, 7, 0, 0, 0, 7, 1, 1, 0, 2, 1, 506, 706, 0, 3903, 3552, 3694, 3338, 3262, 3257, 3222, 3249, 3325, 3261, 5, 0, 0, 1, 0, 0, 0, 48, 31, 61, 53, 46, 17, 17, 34, 32, 9, 22, 17, 31, 27, 19, 52, 5, 46, 84, 38, 14, 5, 19, 2, 2, 0, 8, 0, 8, 0, 180, 0, 5847, 3282, 3729, 3695, 3842, 3356, 316, 139, 487, 117, 95, 476, 289, 428, 609, 467, 5, 446, 592, 955, 130, 112, 57, 390, 168, 14, 0, 2, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override with equally distributed characters
|
||||||
|
if false {
|
||||||
|
histogram = [256]uint64{}
|
||||||
|
var chars string
|
||||||
|
// base c64
|
||||||
|
chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||||
|
// hex
|
||||||
|
//chars = "0123456789abcdef"
|
||||||
|
for _, v := range []byte(chars) {
|
||||||
|
histogram[v] = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum up distributions
|
||||||
|
var total uint64
|
||||||
|
for _, v := range histogram[:] {
|
||||||
|
total += v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale the distribution to approx this size.
|
||||||
|
const scale = 100 << 10
|
||||||
|
var tmp []byte
|
||||||
|
for i, v := range histogram[:] {
|
||||||
|
if v == 0 && omitUnused {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
nf := float64(v) / float64(total) * scale
|
||||||
|
if nf < 1 {
|
||||||
|
nf = 1
|
||||||
|
}
|
||||||
|
t2 := make([]byte, int(math.Ceil(nf)))
|
||||||
|
for j := range t2 {
|
||||||
|
t2[j] = byte(i)
|
||||||
|
}
|
||||||
|
tmp = append(tmp, t2...)
|
||||||
|
}
|
||||||
|
|
||||||
|
var s huff0.Scratch
|
||||||
|
s.Reuse = huff0.ReusePolicyNone
|
||||||
|
_, _, err := huff0.Compress1X(tmp, &s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
fmt.Println("table:", base64.URLEncoding.EncodeToString(s.OutTable))
|
||||||
|
|
||||||
|
// Encode without ones:
|
||||||
|
s.Reuse = huff0.ReusePolicyPrefer
|
||||||
|
tmp = tmp[:0]
|
||||||
|
for i, v := range histogram[:] {
|
||||||
|
nf := float64(v) / float64(total) * scale
|
||||||
|
t2 := make([]byte, int(math.Ceil(nf)))
|
||||||
|
for j := range t2 {
|
||||||
|
t2[j] = byte(i)
|
||||||
|
}
|
||||||
|
tmp = append(tmp, t2...)
|
||||||
|
}
|
||||||
|
_, _, err = huff0.Compress1X(tmp, &s)
|
||||||
|
fmt.Println("sample", len(tmp), "byte, compressed size:", len(s.OutData))
|
||||||
|
fmt.Println("Shannon limit:", compress.ShannonEntropyBits(tmp)/8, "bytes")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("avg size: 1 -> %.02f", float64(len(s.OutData))/float64(len(tmp)))
|
||||||
|
}
|
89
lib/encoder/filename/init.go
Normal file
89
lib/encoder/filename/init.go
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
package filename
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/huff0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// encodeURL is base64 url encoding values.
|
||||||
|
const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||||
|
|
||||||
|
// decodeMap will return x = decodeMap[encodeURL[byte(x)]] - 1 if x >= 0 and x < 64, otherwise -1 is returned.
|
||||||
|
var decodeMap [256]byte
|
||||||
|
|
||||||
|
// maxLength is the maximum length that will be attempted to be compressed.
|
||||||
|
const maxLength = 256
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once // Used to control init of tables.
|
||||||
|
|
||||||
|
encTables [64]*huff0.Scratch // Encoders.
|
||||||
|
encTableLocks [64]sync.Mutex // Temporary locks for encoders since they are stateful.
|
||||||
|
decTables [64]*huff0.Decoder // Stateless decoders.
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
tableUncompressed = 0
|
||||||
|
tableRLE = 61
|
||||||
|
tableCustom = 62
|
||||||
|
tableReserved = 63
|
||||||
|
)
|
||||||
|
|
||||||
|
// predefined tables as base64 URL encoded string.
|
||||||
|
var tablesData = [64]string{
|
||||||
|
// Uncompressed
|
||||||
|
tableUncompressed: "",
|
||||||
|
// ncw home directory
|
||||||
|
1: "MRDIEtAAMAzDMAzDSjX_ybu0w97bb-L3b2mR-rUl5LXW3lZII43kIDMzM1NXu3okgQs=",
|
||||||
|
// ncw images
|
||||||
|
2: "IhDIAEAA______-Pou_4Sf5z-uS-39MVWjullFLKM7EBECs=",
|
||||||
|
// ncw Google Drive:
|
||||||
|
3: "JxDQAIIBMDMzMwOzbv7nJJCyd_m_9D2llCarnQX33nvvlFKEhUxAAQ==",
|
||||||
|
// Hex
|
||||||
|
4: "ExDoSTD___-tfXfhJ0hKSkryTxU=",
|
||||||
|
// Base64
|
||||||
|
5: "JRDIcQf_______8PgIiIiIgINkggARHlkQwSSCCBxHFYINHdfXI=",
|
||||||
|
|
||||||
|
// Special tables:
|
||||||
|
// Compressed data has its own table.
|
||||||
|
tableCustom: "",
|
||||||
|
// Reserved for extension.
|
||||||
|
tableReserved: "",
|
||||||
|
}
|
||||||
|
|
||||||
|
func initCoders() {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
// Init base 64 decoder.
|
||||||
|
for i, v := range encodeURL {
|
||||||
|
decodeMap[v] = byte(i) + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize encoders and decoders.
|
||||||
|
for i, dataString := range tablesData {
|
||||||
|
if len(dataString) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
data, err := base64.URLEncoding.DecodeString(dataString)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
s, _, err := huff0.ReadTable(data, nil)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to save at least len(in) >> 5
|
||||||
|
s.WantLogLess = 5
|
||||||
|
s.Reuse = huff0.ReusePolicyMust
|
||||||
|
encTables[i] = s
|
||||||
|
decTables[i] = s.Decoder()
|
||||||
|
}
|
||||||
|
// Add custom table type.
|
||||||
|
var s huff0.Scratch
|
||||||
|
s.Reuse = huff0.ReusePolicyNone
|
||||||
|
encTables[tableCustom] = &s
|
||||||
|
decTables[tableCustom] = nil
|
||||||
|
})
|
||||||
|
}
|
Loading…
Reference in a new issue