From 5e229d84d424745693ddfa889ec65dd3c12f47e4 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 21 Feb 2020 11:50:03 +0300 Subject: [PATCH 1/2] compiler: use uint16 for label numbers As noted in #687 this will make compiler a bit more predictable. --- pkg/compiler/codegen.go | 50 +++++++++++++++++++++----------------- pkg/compiler/func_scope.go | 4 +-- pkg/vm/emit/emit.go | 6 ++--- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/pkg/compiler/codegen.go b/pkg/compiler/codegen.go index cdb54a938..5126ee8f9 100644 --- a/pkg/compiler/codegen.go +++ b/pkg/compiler/codegen.go @@ -8,6 +8,7 @@ import ( "go/constant" "go/token" "go/types" + "math" "sort" "strconv" "strings" @@ -39,7 +40,7 @@ type codegen struct { scope *funcScope // A mapping from label's names to their ids. - labels map[labelWithType]int + labels map[labelWithType]uint16 // A label for the for-loop being currently visited. currentFor string @@ -66,21 +67,26 @@ type labelWithType struct { } // newLabel creates a new label to jump to -func (c *codegen) newLabel() (l int) { - l = len(c.l) +func (c *codegen) newLabel() (l uint16) { + li := len(c.l) + if li > math.MaxUint16 { + c.prog.Err = errors.New("label number is too big") + return + } + l = uint16(li) c.l = append(c.l, -1) return } // newNamedLabel creates a new label with a specified name. -func (c *codegen) newNamedLabel(typ labelOffsetType, name string) (l int) { +func (c *codegen) newNamedLabel(typ labelOffsetType, name string) (l uint16) { l = c.newLabel() lt := labelWithType{name: name, typ: typ} c.labels[lt] = l return } -func (c *codegen) setLabel(l int) { +func (c *codegen) setLabel(l uint16) { c.l[l] = c.pc() + 1 } @@ -384,13 +390,13 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { if n.Cond != nil { ast.Walk(c, n.Cond) - emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, int16(lElse)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, lElse) } c.setLabel(lIf) ast.Walk(c, n.Body) if n.Else != nil { - emit.Jmp(c.prog.BinWriter, opcode.JMP, int16(lElseEnd)) + emit.Jmp(c.prog.BinWriter, opcode.JMP, lElseEnd) } c.setLabel(lElse) @@ -421,9 +427,9 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { ast.Walk(c, cc.List[j]) emit.Opcode(c.prog.BinWriter, eqOpcode) if j == l-1 { - emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, int16(lEnd)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, lEnd) } else { - emit.Jmp(c.prog.BinWriter, opcode.JMPIF, int16(lStart)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIF, lStart) } } } @@ -432,7 +438,7 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { for _, stmt := range cc.Body { ast.Walk(c, stmt) } - emit.Jmp(c.prog.BinWriter, opcode.JMP, int16(switchEnd)) + emit.Jmp(c.prog.BinWriter, opcode.JMP, switchEnd) c.setLabel(lEnd) } @@ -500,13 +506,13 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { switch n.Op { case token.LAND: ast.Walk(c, n.X) - emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, int16(len(c.l)-1)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, uint16(len(c.l)-1)) ast.Walk(c, n.Y) return nil case token.LOR: ast.Walk(c, n.X) - emit.Jmp(c.prog.BinWriter, opcode.JMPIF, int16(len(c.l)-3)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIF, uint16(len(c.l)-3)) ast.Walk(c, n.Y) return nil @@ -612,7 +618,7 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { case isSyscall(f): c.convertSyscall(f.selector.Name, f.name) default: - emit.Call(c.prog.BinWriter, opcode.CALL, int16(f.label)) + emit.Call(c.prog.BinWriter, opcode.CALL, f.label) } return nil @@ -702,10 +708,10 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { switch n.Tok { case token.BREAK: end := c.getLabelOffset(labelEnd, label) - emit.Jmp(c.prog.BinWriter, opcode.JMP, int16(end)) + emit.Jmp(c.prog.BinWriter, opcode.JMP, end) case token.CONTINUE: post := c.getLabelOffset(labelPost, label) - emit.Jmp(c.prog.BinWriter, opcode.JMP, int16(post)) + emit.Jmp(c.prog.BinWriter, opcode.JMP, post) } return nil @@ -737,7 +743,7 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { ast.Walk(c, n.Cond) // Jump if the condition is false - emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, int16(fend)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIFNOT, fend) // Walk body followed by the iterator (post stmt). ast.Walk(c, n.Body) @@ -747,7 +753,7 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { } // Jump back to condition. - emit.Jmp(c.prog.BinWriter, opcode.JMP, int16(fstart)) + emit.Jmp(c.prog.BinWriter, opcode.JMP, fstart) c.setLabel(fend) c.currentFor = lastLabel @@ -782,7 +788,7 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { emit.Opcode(c.prog.BinWriter, opcode.OVER) emit.Opcode(c.prog.BinWriter, opcode.OVER) emit.Opcode(c.prog.BinWriter, opcode.LTE) // finish if len <= i - emit.Jmp(c.prog.BinWriter, opcode.JMPIF, int16(end)) + emit.Jmp(c.prog.BinWriter, opcode.JMPIF, end) if n.Key != nil { emit.Opcode(c.prog.BinWriter, opcode.DUP) @@ -796,7 +802,7 @@ func (c *codegen) Visit(node ast.Node) ast.Visitor { c.setLabel(post) emit.Opcode(c.prog.BinWriter, opcode.INC) - emit.Jmp(c.prog.BinWriter, opcode.JMP, int16(start)) + emit.Jmp(c.prog.BinWriter, opcode.JMP, start) c.setLabel(end) @@ -833,7 +839,7 @@ func (c *codegen) emitReverse(num int) { } // generateLabel returns a new label. -func (c *codegen) generateLabel(typ labelOffsetType) (int, string) { +func (c *codegen) generateLabel(typ labelOffsetType) (uint16, string) { name := c.nextLabel if name == "" { name = fmt.Sprintf("@%d", len(c.l)) @@ -843,7 +849,7 @@ func (c *codegen) generateLabel(typ labelOffsetType) (int, string) { return c.newNamedLabel(typ, name), name } -func (c *codegen) getLabelOffset(typ labelOffsetType, name string) int { +func (c *codegen) getLabelOffset(typ labelOffsetType, name string) uint16 { return c.labels[labelWithType{name: name, typ: typ}] } @@ -1144,7 +1150,7 @@ func CodeGen(info *buildInfo) ([]byte, error) { prog: io.NewBufBinWriter(), l: []int{}, funcs: map[string]*funcScope{}, - labels: map[labelWithType]int{}, + labels: map[labelWithType]uint16{}, typeInfo: &pkg.Info, } diff --git a/pkg/compiler/func_scope.go b/pkg/compiler/func_scope.go index f88903861..787207e11 100644 --- a/pkg/compiler/func_scope.go +++ b/pkg/compiler/func_scope.go @@ -18,7 +18,7 @@ type funcScope struct { decl *ast.FuncDecl // Program label of the scope - label int + label uint16 // Local variables locals map[string]int @@ -35,7 +35,7 @@ type funcScope struct { i int } -func newFuncScope(decl *ast.FuncDecl, label int) *funcScope { +func newFuncScope(decl *ast.FuncDecl, label uint16) *funcScope { return &funcScope{ name: decl.Name.Name, decl: decl, diff --git a/pkg/vm/emit/emit.go b/pkg/vm/emit/emit.go index c458d07f9..5bba67a9b 100644 --- a/pkg/vm/emit/emit.go +++ b/pkg/vm/emit/emit.go @@ -91,12 +91,12 @@ func Syscall(w *io.BinWriter, api string) { } // Call emits a call Instruction with label to the given buffer. -func Call(w *io.BinWriter, op opcode.Opcode, label int16) { +func Call(w *io.BinWriter, op opcode.Opcode, label uint16) { Jmp(w, op, label) } // Jmp emits a jump Instruction along with label to the given buffer. -func Jmp(w *io.BinWriter, op opcode.Opcode, label int16) { +func Jmp(w *io.BinWriter, op opcode.Opcode, label uint16) { if w.Err != nil { return } else if !isInstructionJmp(op) { @@ -104,7 +104,7 @@ func Jmp(w *io.BinWriter, op opcode.Opcode, label int16) { return } buf := make([]byte, 2) - binary.LittleEndian.PutUint16(buf, uint16(label)) + binary.LittleEndian.PutUint16(buf, label) Instruction(w, op, buf) } From 177b725dc17854def0c3dd38c3d3996973b66a5f Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 21 Feb 2020 11:57:24 +0300 Subject: [PATCH 2/2] compiler: make writeJumps return error for bad jumps The script is invalid anyway so it is better to notify user. --- pkg/compiler/codegen.go | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pkg/compiler/codegen.go b/pkg/compiler/codegen.go index 5126ee8f9..f7f56e382 100644 --- a/pkg/compiler/codegen.go +++ b/pkg/compiler/codegen.go @@ -1203,7 +1203,9 @@ func CodeGen(info *buildInfo) ([]byte, error) { return nil, c.prog.Err } buf := c.prog.Bytes() - c.writeJumps(buf) + if err := c.writeJumps(buf); err != nil { + return nil, err + } return buf, nil } @@ -1218,20 +1220,26 @@ func (c *codegen) resolveFuncDecls(f *ast.File) { } } -func (c *codegen) writeJumps(b []byte) { +func (c *codegen) writeJumps(b []byte) error { ctx := vm.NewContext(b) for op, _, err := ctx.Next(); err == nil && ctx.NextIP() < len(b); op, _, err = ctx.Next() { switch op { case opcode.JMP, opcode.JMPIFNOT, opcode.JMPIF, opcode.CALL: // we can't use arg returned by ctx.Next() because it is copied - arg := b[ctx.NextIP()-2:] + nextIP := ctx.NextIP() + arg := b[nextIP-2:] - index := int16(binary.LittleEndian.Uint16(arg)) - if int(index) > len(c.l) || int(index) < 0 { - continue + index := binary.LittleEndian.Uint16(arg) + if int(index) > len(c.l) { + return fmt.Errorf("unexpected label number: %d (max %d)", index, len(c.l)) } - offset := uint16(c.l[index] - ctx.NextIP() + 3) - binary.LittleEndian.PutUint16(arg, offset) + offset := c.l[index] - nextIP + 3 + if offset > math.MaxUint16 { + return fmt.Errorf("label offset is too big at the instruction %d: %d (max %d)", + nextIP-3, offset, math.MaxUint16) + } + binary.LittleEndian.PutUint16(arg, uint16(offset)) } } + return nil }