info: rewrite invalid character test and reporting

This commit is contained in:
Fabian Möller 2019-05-14 17:49:55 +02:00 committed by Nick Craig-Wood
parent c8d3e57418
commit 6ba08b8612
6 changed files with 494 additions and 116 deletions

View file

@ -6,15 +6,21 @@ package info
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"os"
"path"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/pkg/errors"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/cmd/info/internal"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/object"
@ -22,28 +28,24 @@ import (
"github.com/spf13/cobra"
)
type position int
const (
positionMiddle position = 1 << iota
positionLeft
positionRight
positionNone position = 0
positionAll position = positionRight<<1 - 1
)
var (
writeJSON string
checkNormalization bool
checkControl bool
checkLength bool
checkStreaming bool
positionList = []position{positionMiddle, positionLeft, positionRight}
uploadWait time.Duration
positionLeftRe = regexp.MustCompile(`(?s)^(.*)-position-left-([[:xdigit:]]+)$`)
positionMiddleRe = regexp.MustCompile(`(?s)^position-middle-([[:xdigit:]]+)-(.*)-$`)
positionRightRe = regexp.MustCompile(`(?s)^position-right-([[:xdigit:]]+)-(.*)$`)
)
func init() {
cmd.Root.AddCommand(commandDefintion)
commandDefintion.Flags().StringVarP(&writeJSON, "write-json", "", "", "Write results to file.")
commandDefintion.Flags().BoolVarP(&checkNormalization, "check-normalization", "", true, "Check UTF-8 Normalization.")
commandDefintion.Flags().BoolVarP(&checkControl, "check-control", "", true, "Check control characters.")
commandDefintion.Flags().DurationVarP(&uploadWait, "upload-wait", "", 0, "Wait after writing a file.")
commandDefintion.Flags().BoolVarP(&checkLength, "check-length", "", true, "Check max filename length.")
commandDefintion.Flags().BoolVarP(&checkStreaming, "check-streaming", "", true, "Check uploads with indeterminate file size.")
}
@ -72,7 +74,8 @@ type results struct {
ctx context.Context
f fs.Fs
mu sync.Mutex
stringNeedsEscaping map[string]position
stringNeedsEscaping map[string]internal.Position
controlResults map[string]internal.ControlResult
maxFileLength int
canWriteUnnormalized bool
canReadUnnormalized bool
@ -84,7 +87,8 @@ func newResults(ctx context.Context, f fs.Fs) *results {
return &results{
ctx: ctx,
f: f,
stringNeedsEscaping: make(map[string]position),
stringNeedsEscaping: make(map[string]internal.Position),
controlResults: make(map[string]internal.ControlResult),
}
}
@ -94,12 +98,14 @@ func (r *results) Print() {
if checkControl {
escape := []string{}
for c, needsEscape := range r.stringNeedsEscaping {
if needsEscape != positionNone {
escape = append(escape, fmt.Sprintf("0x%02X", c))
if needsEscape != internal.PositionNone {
k := strconv.Quote(c)
k = k[1 : len(k)-1]
escape = append(escape, fmt.Sprintf("'%s'", k))
}
}
sort.Strings(escape)
fmt.Printf("stringNeedsEscaping = []byte{\n")
fmt.Printf("stringNeedsEscaping = []rune{\n")
fmt.Printf("\t%s\n", strings.Join(escape, ", "))
fmt.Printf("}\n")
}
@ -116,11 +122,53 @@ func (r *results) Print() {
}
}
// WriteJSON writes the results to a JSON file when requested
func (r *results) WriteJSON() {
if writeJSON == "" {
return
}
report := internal.InfoReport{
Remote: r.f.Name(),
}
if checkControl {
report.ControlCharacters = &r.controlResults
}
if checkLength {
report.MaxFileLength = &r.maxFileLength
}
if checkNormalization {
report.CanWriteUnnormalized = &r.canWriteUnnormalized
report.CanReadUnnormalized = &r.canReadUnnormalized
report.CanReadRenormalized = &r.canReadRenormalized
}
if checkStreaming {
report.CanStream = &r.canStream
}
if f, err := os.Create(writeJSON); err != nil {
fs.Errorf(r.f, "Creating JSON file failed: %s", err)
} else {
defer fs.CheckClose(f, &err)
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
err := enc.Encode(report)
if err != nil {
fs.Errorf(r.f, "Writing JSON file failed: %s", err)
}
}
fs.Infof(r.f, "Wrote JSON file: %s", writeJSON)
}
// writeFile writes a file with some random contents
func (r *results) writeFile(path string) (fs.Object, error) {
contents := random.String(50)
src := object.NewStaticObjectInfo(path, time.Now(), int64(len(contents)), true, nil, r.f)
return r.f.Put(r.ctx, bytes.NewBufferString(contents), src)
obj, err := r.f.Put(r.ctx, bytes.NewBufferString(contents), src)
if uploadWait > 0 {
time.Sleep(uploadWait)
}
return obj, err
}
// check whether normalization is enforced and check whether it is
@ -144,45 +192,55 @@ func (r *results) checkUTF8Normalization() {
}
}
func (r *results) checkStringPositions(s string) {
func (r *results) checkStringPositions(k, s string) {
fs.Infof(r.f, "Writing position file 0x%0X", s)
positionError := positionNone
positionError := internal.PositionNone
res := internal.ControlResult{
Text: s,
WriteError: make(map[internal.Position]string, 3),
GetError: make(map[internal.Position]string, 3),
InList: make(map[internal.Position]internal.Presence, 3),
}
for _, pos := range positionList {
for _, pos := range internal.PositionList {
path := ""
switch pos {
case positionMiddle:
case internal.PositionMiddle:
path = fmt.Sprintf("position-middle-%0X-%s-", s, s)
case positionLeft:
case internal.PositionLeft:
path = fmt.Sprintf("%s-position-left-%0X", s, s)
case positionRight:
case internal.PositionRight:
path = fmt.Sprintf("position-right-%0X-%s", s, s)
default:
panic("invalid position: " + pos.String())
}
_, writeErr := r.writeFile(path)
if writeErr != nil {
fs.Infof(r.f, "Writing %s position file 0x%0X Error: %s", pos.String(), s, writeErr)
_, writeError := r.writeFile(path)
if writeError != nil {
res.WriteError[pos] = writeError.Error()
fs.Infof(r.f, "Writing %s position file 0x%0X Error: %s", pos.String(), s, writeError)
} else {
fs.Infof(r.f, "Writing %s position file 0x%0X OK", pos.String(), s)
}
obj, getErr := r.f.NewObject(r.ctx, path)
if getErr != nil {
res.GetError[pos] = getErr.Error()
fs.Infof(r.f, "Getting %s position file 0x%0X Error: %s", pos.String(), s, getErr)
} else {
if obj.Size() != 50 {
res.GetError[pos] = fmt.Sprintf("invalid size %d", obj.Size())
fs.Infof(r.f, "Getting %s position file 0x%0X Invalid Size: %d", pos.String(), s, obj.Size())
} else {
fs.Infof(r.f, "Getting %s position file 0x%0X OK", pos.String(), s)
}
}
if writeErr != nil || getErr != nil {
if writeError != nil || getErr != nil {
positionError += pos
}
}
r.mu.Lock()
r.stringNeedsEscaping[s] = positionError
r.stringNeedsEscaping[k] = positionError
r.controlResults[k] = res
r.mu.Unlock()
}
@ -199,30 +257,97 @@ func (r *results) checkControls() {
s := string(i)
if i == 0 || i == '/' {
// We're not even going to check NULL or /
r.stringNeedsEscaping[s] = positionAll
r.stringNeedsEscaping[s] = internal.PositionAll
continue
}
wg.Add(1)
go func(s string) {
defer wg.Done()
token := <-tokens
r.checkStringPositions(s)
k := s
r.checkStringPositions(k, s)
tokens <- token
}(s)
}
for _, s := range []string{"", "\xBF", "\xFE"} {
for _, s := range []string{"", "\u00A0", "\xBF", "\xFE"} {
wg.Add(1)
go func(s string) {
defer wg.Done()
token := <-tokens
r.checkStringPositions(s)
k := s
r.checkStringPositions(k, s)
tokens <- token
}(s)
}
wg.Wait()
r.checkControlsList()
fs.Infof(r.f, "Done trying to create control character file names")
}
func (r *results) checkControlsList() {
l, err := r.f.List(context.TODO(), "")
if err != nil {
fs.Errorf(r.f, "Listing control character file names failed: %s", err)
return
}
namesMap := make(map[string]struct{}, len(l))
for _, s := range l {
namesMap[path.Base(s.Remote())] = struct{}{}
}
for path := range namesMap {
var pos internal.Position
var hex, value string
if g := positionLeftRe.FindStringSubmatch(path); g != nil {
pos, hex, value = internal.PositionLeft, g[2], g[1]
} else if g := positionMiddleRe.FindStringSubmatch(path); g != nil {
pos, hex, value = internal.PositionMiddle, g[1], g[2]
} else if g := positionRightRe.FindStringSubmatch(path); g != nil {
pos, hex, value = internal.PositionRight, g[1], g[2]
} else {
fs.Infof(r.f, "Unknown path %q", path)
continue
}
var hexValue []byte
for ; len(hex) >= 2; hex = hex[2:] {
if b, err := strconv.ParseUint(hex[:2], 16, 8); err != nil {
fs.Infof(r.f, "Invalid path %q: %s", path, err)
continue
} else {
hexValue = append(hexValue, byte(b))
}
}
if hex != "" {
fs.Infof(r.f, "Invalid path %q", path)
continue
}
hexStr := string(hexValue)
k := hexStr
switch r.controlResults[k].InList[pos] {
case internal.Absent:
if hexStr == value {
r.controlResults[k].InList[pos] = internal.Present
} else {
r.controlResults[k].InList[pos] = internal.Renamed
}
case internal.Present:
r.controlResults[k].InList[pos] = internal.Multiple
case internal.Renamed:
r.controlResults[k].InList[pos] = internal.Multiple
}
delete(namesMap, path)
}
if len(namesMap) > 0 {
fs.Infof(r.f, "Found additional control character file names:")
for name := range namesMap {
fs.Infof(r.f, "%q", name)
}
}
}
// find the max file name size we can use
func (r *results) findMaxLength() {
const maxLen = 16 * 1024
@ -314,37 +439,6 @@ func readInfo(ctx context.Context, f fs.Fs) error {
r.checkStreaming()
}
r.Print()
r.WriteJSON()
return nil
}
func (e position) String() string {
switch e {
case positionNone:
return "none"
case positionAll:
return "all"
}
var buf bytes.Buffer
if e&positionMiddle != 0 {
buf.WriteString("middle")
e &= ^positionMiddle
}
if e&positionLeft != 0 {
if buf.Len() != 0 {
buf.WriteRune(',')
}
buf.WriteString("left")
e &= ^positionLeft
}
if e&positionRight != 0 {
if buf.Len() != 0 {
buf.WriteRune(',')
}
buf.WriteString("right")
e &= ^positionRight
}
if e != positionNone {
panic("invalid position")
}
return buf.String()
}

View file

@ -0,0 +1,158 @@
package main
import (
"encoding/csv"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"sort"
"strconv"
"github.com/rclone/rclone/cmd/info/internal"
)
func main() {
fOut := flag.String("o", "out.csv", "Output file")
flag.Parse()
args := flag.Args()
remotes := make([]internal.InfoReport, 0, len(args))
for _, fn := range args {
f, err := os.Open(fn)
if err != nil {
log.Fatalf("Unable to open %q: %s", fn, err)
}
var remote internal.InfoReport
dec := json.NewDecoder(f)
err = dec.Decode(&remote)
if err != nil {
log.Fatalf("Unable to decode %q: %s", fn, err)
}
if remote.ControlCharacters == nil {
log.Printf("Skipping remote %s: no ControlCharacters", remote.Remote)
} else {
remotes = append(remotes, remote)
}
if err := f.Close(); err != nil {
log.Fatalf("Closing %q failed: %s", fn, err)
}
}
charsMap := make(map[string]string)
var remoteNames []string
for _, r := range remotes {
remoteNames = append(remoteNames, r.Remote)
for k, v := range *r.ControlCharacters {
v.Text = k
quoted := strconv.Quote(k)
charsMap[k] = quoted[1 : len(quoted)-1]
}
}
sort.Strings(remoteNames)
chars := make([]string, 0, len(charsMap))
for k := range charsMap {
chars = append(chars, k)
}
sort.Strings(chars)
// char remote output
recordsMap := make(map[string]map[string][]string)
// remote output
hRemoteMap := make(map[string][]string)
hOperation := []string{"Write", "Write", "Write", "Get", "Get", "Get", "List", "List", "List"}
hPosition := []string{"L", "M", "R", "L", "M", "R", "L", "M", "R"}
// remote
// write get list
// left middle right left middle right left middle right
for _, r := range remotes {
hRemoteMap[r.Remote] = []string{r.Remote, "", "", "", "", "", "", "", ""}
for k, v := range *r.ControlCharacters {
cMap, ok := recordsMap[k]
if !ok {
cMap = make(map[string][]string, 1)
recordsMap[k] = cMap
}
cMap[r.Remote] = []string{
sok(v.WriteError[internal.PositionLeft]), sok(v.WriteError[internal.PositionMiddle]), sok(v.WriteError[internal.PositionRight]),
sok(v.GetError[internal.PositionLeft]), sok(v.GetError[internal.PositionMiddle]), sok(v.GetError[internal.PositionRight]),
pok(v.InList[internal.PositionLeft]), pok(v.InList[internal.PositionMiddle]), pok(v.InList[internal.PositionRight]),
}
}
}
records := [][]string{
[]string{"", ""},
[]string{"", ""},
[]string{"Bytes", "Char"},
}
for _, r := range remoteNames {
records[0] = append(records[0], hRemoteMap[r]...)
records[1] = append(records[1], hOperation...)
records[2] = append(records[2], hPosition...)
}
for _, c := range chars {
k := charsMap[c]
row := []string{fmt.Sprintf("%X", c), k}
for _, r := range remoteNames {
if m, ok := recordsMap[k][r]; ok {
row = append(row, m...)
} else {
row = append(row, "", "", "", "", "", "", "", "", "")
}
}
records = append(records, row)
}
var writer io.Writer
if *fOut == "-" {
writer = os.Stdout
} else {
f, err := os.Create(*fOut)
if err != nil {
log.Fatalf("Unable to create %q: %s", *fOut, err)
}
defer func() {
if err := f.Close(); err != nil {
log.Fatalln("Error writing csv:", err)
}
}()
writer = f
}
w := csv.NewWriter(writer)
err := w.WriteAll(records)
if err != nil {
log.Fatalln("Error writing csv:", err)
} else if err := w.Error(); err != nil {
log.Fatalln("Error writing csv:", err)
}
}
func sok(s string) string {
if s != "" {
return "ERR"
}
return "OK"
}
func pok(p internal.Presence) string {
switch p {
case internal.Absent:
return "MIS"
case internal.Present:
return "OK"
case internal.Renamed:
return "REN"
case internal.Multiple:
return "MUL"
default:
return "ERR"
}
}

View file

@ -0,0 +1,156 @@
package internal
import (
"bytes"
"encoding/json"
"fmt"
"strings"
)
// Presence describes the presence of a filename in file listing
type Presence int
// Possible Presence states
const (
Absent Presence = iota
Present
Renamed
Multiple
)
// Position is the placement of the test character in the filename
type Position int
// Predefined positions
const (
PositionMiddle Position = 1 << iota
PositionLeft
PositionRight
PositionNone Position = 0
PositionAll Position = PositionRight<<1 - 1
)
// PositionList contains all valid positions
var PositionList = []Position{PositionMiddle, PositionLeft, PositionRight}
// ControlResult contains the result of a single character test
type ControlResult struct {
Text string `json:"-"`
WriteError map[Position]string
GetError map[Position]string
InList map[Position]Presence
}
// InfoReport is the structure of the JSON output
type InfoReport struct {
Remote string
ControlCharacters *map[string]ControlResult
MaxFileLength *int
CanStream *bool
CanWriteUnnormalized *bool
CanReadUnnormalized *bool
CanReadRenormalized *bool
}
func (e Position) String() string {
switch e {
case PositionNone:
return "none"
case PositionAll:
return "all"
}
var buf bytes.Buffer
if e&PositionMiddle != 0 {
buf.WriteString("middle")
e &= ^PositionMiddle
}
if e&PositionLeft != 0 {
if buf.Len() != 0 {
buf.WriteRune(',')
}
buf.WriteString("left")
e &= ^PositionLeft
}
if e&PositionRight != 0 {
if buf.Len() != 0 {
buf.WriteRune(',')
}
buf.WriteString("right")
e &= ^PositionRight
}
if e != PositionNone {
panic("invalid position")
}
return buf.String()
}
// MarshalText encodes the position when used as a map key
func (e Position) MarshalText() ([]byte, error) {
return []byte(e.String()), nil
}
// UnmarshalText decodes a position when used as a map key
func (e *Position) UnmarshalText(text []byte) error {
switch s := strings.ToLower(string(text)); s {
default:
*e = PositionNone
for _, p := range strings.Split(s, ",") {
switch p {
case "left":
*e |= PositionLeft
case "middle":
*e |= PositionMiddle
case "right":
*e |= PositionRight
default:
return fmt.Errorf("unknown position: %s", e)
}
}
case "none":
*e = PositionNone
case "all":
*e = PositionAll
}
return nil
}
func (e Presence) String() string {
switch e {
case Absent:
return "absent"
case Present:
return "present"
case Renamed:
return "renamed"
case Multiple:
return "multiple"
default:
panic("invalid presence")
}
}
// MarshalJSON encodes the presence when used as a JSON value
func (e Presence) MarshalJSON() ([]byte, error) {
return json.Marshal(e.String())
}
// UnmarshalJSON decodes a presence when used as a JSON value
func (e *Presence) UnmarshalJSON(text []byte) error {
var s string
if err := json.Unmarshal(text, &s); err != nil {
return err
}
switch s := strings.ToLower(s); s {
case "absent":
*e = Absent
case "present":
*e = Present
case "renamed":
*e = Renamed
case "multiple":
*e = Multiple
default:
return fmt.Errorf("unknown presence: %s", e)
}
return nil
}

View file

@ -1,40 +0,0 @@
set -euo pipefail
for f in info-*.log; do
for pos in middle left right; do
egrep -oe " Writing $pos position file [^ ]* \w+" $f | sort | cut -d' ' -f 7 > $f.write_$pos
egrep -oe " Getting $pos position file [^ ]* \w+" $f | sort | cut -d' ' -f 7 > $f.get_$pos
done
{
echo "${${f%.log}#info-}\t${${f%.log}#info-}\t${${f%.log}#info-}\t${${f%.log}#info-}\t${${f%.log}#info-}\t${${f%.log}#info-}"
echo "Write\tWrite\tWrite\tGet\tGet\tGet"
echo "Mid\tLeft\tRight\tMid\tLeft\tRight"
paste $f.write_{middle,left,right} $f.get_{middle,left,right}
} > $f.csv
done
for f in info-*.list; do
for pos in middle left right; do
cat $f | perl -lne 'print $1 if /^\s+[0-9]+\s+(.*)/' | grep -a "position-$pos-" | sort > $f.$pos
done
{
echo "${${f%.list}#info-}\t${${f%.list}#info-}\t${${f%.list}#info-}"
echo "List\tList\tList"
echo "Mid\tLeft\tRight"
for e in 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F BF EFBCBC FE; do
echo -n $(perl -lne 'print "'$e'-$1" if /^position-middle-'$e'-(.*)-/' $f.middle | tr -d "\t\r" | grep -a . || echo Miss)
echo -n "\t"
echo -n $(perl -lne 'print "'$e'-$1" if /^(.*)-position-left-'$e'/' $f.left | tr -d "\t\r" | grep -a . || echo Miss)
echo -n "\t"
echo $(perl -lne 'print "'$e'-$1" if /^position-right-'$e'-(.*)/' $f.right | tr -d "\t\r" | grep -a . || echo Miss)
# echo -n $(grep -a "position-middle-$e-" $f.middle | tr -d "\t\r" || echo Miss)"\t"
# echo -n $(grep -a "position-left-$e" $f.left | tr -d "\t\r" || echo Miss)"\t"
# echo $(grep -a "position-right-$e-" $f.right | tr -d "\t\r" || echo Miss)
done
} > $f.csv
done
for f in info-*.list; do
paste ${f%.list}.log.csv $f.csv > ${f%.list}.full.csv
done
paste *.full.csv > info-complete.csv

View file

@ -1,3 +1,4 @@
rclone.exe purge info
rclone.exe info -vv info > info-LocalWindows.log 2>&1
rclone.exe ls -vv info > info-LocalWindows.list 2>&1
set RCLONE_CONFIG_LOCALWINDOWS_TYPE=local
rclone.exe purge LocalWindows:info
rclone.exe info -vv LocalWindows:info --write-json=info-LocalWindows.json > info-LocalWindows.log 2>&1
rclone.exe ls -vv LocalWindows:info > info-LocalWindows.list 2>&1

View file

@ -7,17 +7,19 @@
export PATH=$GOPATH/src/github.com/rclone/rclone:$PATH
typeset -A allRemotes
allRemotes=(
TestAmazonCloudDrive '--low-level-retries=2 --checkers=5'
allRemotes=(
TestAmazonCloudDrive '--low-level-retries=2 --checkers=5 --upload-wait=5s'
TestB2 ''
TestBox ''
TestDrive '--tpslimit=5'
TestCrypt ''
TestDropbox '--checkers=1'
TestGCS ''
TestJottacloud ''
TestKoofr ''
TestMega ''
TestOneDrive ''
TestOpenDrive '--low-level-retries=2 --checkers=5'
TestOpenDrive '--low-level-retries=4 --checkers=5'
TestPcloud '--low-level-retries=2 --timeout=15s'
TestS3 ''
Local ''
@ -33,11 +35,18 @@ elif [[ $1 = --list ]]; then
fi
for remote; do
dir=$remote:infotest
if [[ $remote = Local ]]; then
dir=infotest
fi
case $remote in
Local)
l=Local$(uname)
export RCLONE_CONFIG_${l:u}_TYPE=local
dir=$l:infotest;;
TestGCS)
dir=$remote:$GCS_BUCKET/infotest;;
*)
dir=$remote:infotest;;
esac
rclone purge $dir || :
rclone info -vv $dir ${=allRemotes[$remote]} &> info-$remote.log
rclone info -vv $dir --write-json=info-$remote.json ${=allRemotes[$remote]:-} &> info-$remote.log
rclone ls -vv $dir &> info-$remote.list
done