[#3] policy: use ANTLRv4 parser generator

Signed-off-by: Evgenii Stratonikov <evgeniy@nspcc.ru>
This commit is contained in:
Evgenii Stratonikov 2021-05-21 18:12:32 +03:00
parent 86a9aa9259
commit 3aeeafe79e
21 changed files with 372 additions and 238 deletions

4
.gitattributes vendored Normal file
View file

@ -0,0 +1,4 @@
/pkg/policy/parser/*.go -diff
/pkg/policy/parser/generate.go diff
**/*.interp -diff
**/*.tokens -diff

2
go.mod
View file

@ -3,7 +3,7 @@ module github.com/nspcc-dev/neofs-sdk-go
go 1.16
require (
github.com/alecthomas/participle v0.7.1
github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20210521073959-f0d4d129b7f1
github.com/nspcc-dev/neofs-api-go v1.27.0
github.com/stretchr/testify v1.6.1
go.uber.org/zap v1.10.0

BIN
go.sum

Binary file not shown.

View file

@ -1,5 +1,5 @@
// Package policy provides facilities for creating policy from SQL-like language.
// eBNF grammar is provided in `grammar.ebnf` for illustration.
// ANTLRv4 grammar is provided in `parser/Query.g4` and `parser/QueryLexer.g4`.
//
// Current limitations:
// 1. Grouping filter expressions in parenthesis is not supported right now.

View file

@ -1,55 +0,0 @@
Policy ::=
RepStmt, [RepStmt],
CbtStmt?,
[SelectStmt],
[FilterStmt],
;
RepStmt ::=
'REP', Number1, (* number of object replicas *)
('AS', Ident)? (* optional selector name *)
;
CbtStmt ::= 'CBF', Number1 (* container backup factor *)
;
SelectStmt ::=
'SELECT', Number1, (* number of nodes to select without container backup factor *)
('IN', Clause?, Ident)?, (* bucket name *)
FROM, (Ident | '*'), (* filter reference or whole netmap *)
('AS', Ident)? (* optional selector name *)
;
Clause ::=
'SAME' (* nodes from the same bucket *)
| 'DISTINCT' (* nodes from distinct buckets *)
;
FilterStmt ::=
'FILTER', AndChain, ['OR', AndChain],
'AS', Ident (* obligatory filter name *)
;
AndChain ::=
Expr, ['AND', Expr]
;
Expr ::=
'@' Ident (* filter reference *)
| Key, Op, Value (* attribute filter *)
;
Op ::= 'EQ' | 'NE' | 'GE' | 'GT' | 'LT' | 'LE'
;
Key ::= Ident | String
;
Value ::= Ident | Number | String
;
Number1 ::= Digit1 [Digit];
Number ::= Digit [Digit];
Digit1 ::= '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ;
Digit ::= '0' | Digit1;

View file

@ -1,60 +0,0 @@
//nolint:govet,golint // fails on struct tags here, but participle needs this syntax
package policy
import (
"github.com/alecthomas/participle"
)
var parser *participle.Parser
func init() {
p, err := participle.Build(&query{})
if err != nil {
panic(err)
}
parser = p
}
type query struct {
Replicas []*replicaStmt `@@+`
CBF uint32 `("CBF" @Int)?`
Selectors []*selectorStmt `@@*`
Filters []*filterStmt `@@*`
}
type replicaStmt struct {
Count int `"REP" @Int`
Selector string `("IN" @Ident)?`
}
type selectorStmt struct {
Count uint32 `"SELECT" @Int`
Bucket []string `("IN" @(("SAME" | "DISTINCT")? Ident))?`
Filter string `"FROM" @(Ident | "*")`
Name string `("AS" @Ident)?`
}
type filterStmt struct {
Value *orChain `"FILTER" @@`
Name string `"AS" @Ident`
}
type filterOrExpr struct {
Reference string `"@"@Ident`
Expr *simpleExpr `| @@`
}
type orChain struct {
Clauses []*andChain `@@ ("OR" @@)*`
}
type andChain struct {
Clauses []*filterOrExpr `@@ ("AND" @@)*`
}
type simpleExpr struct {
Key string `@(Ident | String)`
// We don't use literals here to improve error messages.
Op string `@Ident`
Value string `@(Ident | String | Int)`
}

View file

@ -0,0 +1,45 @@
parser grammar Query;
options {
tokenVocab = QueryLexer;
}
policy: repStmt+ cbfStmt? selectStmt* filterStmt*;
repStmt:
REP Count = NUMBER1 // number of object replicas
(IN Selector = ident)?; // optional selector name
cbfStmt: CBF BackupFactor = NUMBER1; // container backup factor
selectStmt:
SELECT Count = NUMBER1 // number of nodes to select without container backup factor *)
(IN clause? Bucket = ident)? // bucket name
FROM Filter = identWC // filter reference or whole netmap
(AS Name = ident)? // optional selector name
;
clause: CLAUSE_SAME | CLAUSE_DISTINCT; // nodes from distinct buckets
filterExpr:
F1 = filterExpr Op = AND_OP F2 = filterExpr
| F1 = filterExpr Op = OR_OP F2 = filterExpr
| expr
;
filterStmt:
FILTER Expr = filterExpr
AS Name = ident // obligatory filter name
;
expr:
AT Filter = ident // reference to named filter
| Key = filterKey SIMPLE_OP Value = filterValue // attribute comparison
;
filterKey : ident | STRING;
filterValue : ident | number | STRING;
number : ZERO | NUMBER1;
keyword : REP | IN | AS | SELECT | FROM | FILTER;
ident : keyword | IDENT;
identWC : ident | WILDCARD;

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,41 @@
lexer grammar QueryLexer;
AND_OP : 'AND';
OR_OP : 'OR';
SIMPLE_OP : 'EQ' | 'NE' | 'GE' | 'GT' | 'LT' | 'LE';
REP : 'REP';
IN : 'IN';
AS : 'AS';
CBF : 'CBF';
SELECT : 'SELECT';
FROM : 'FROM';
FILTER : 'FILTER';
WILDCARD : '*';
CLAUSE_SAME : 'SAME';
CLAUSE_DISTINCT : 'DISTINCT';
L_PAREN : '(';
R_PAREN : ')';
AT : '@';
IDENT : Nondigit (Digit | Nondigit)* ;
fragment Digit : [0-9] ;
fragment Nondigit : [a-zA-Z_] ;
NUMBER1 : [1-9] Digit* ;
ZERO : '0' ;
// Taken from antlr4 json grammar with minor corrections.
// https://github.com/antlr/grammars-v4/blob/master/json/JSON.g4
STRING : '"' (ESC | SAFECODEPOINTDOUBLE)* '"'
| '\'' (ESC | SAFECODEPOINTSINGLE)* '\'' ;
fragment ESC : '\\' (['"\\/bfnrt] | UNICODE) ;
fragment UNICODE : 'u' HEX HEX HEX HEX ;
fragment HEX : [0-9a-fA-F] ;
fragment SAFECODEPOINTSINGLE : ~ ['\\\u0000-\u001F] ;
fragment SAFECODEPOINTDOUBLE : ~ ["\\\u0000-\u001F] ;
WS : [ \t\n\r] + -> skip ;

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,3 @@
package parser
//go:generate antlr4 -Dlanguage=Go -visitor QueryLexer.g4 Query.g4

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -3,9 +3,12 @@ package policy
import (
"errors"
"fmt"
"strconv"
"strings"
"github.com/antlr/antlr4/runtime/Go/antlr"
"github.com/nspcc-dev/neofs-api-go/pkg/netmap"
"github.com/nspcc-dev/neofs-sdk-go/pkg/policy/parser"
)
var (
@ -19,83 +22,252 @@ var (
ErrUnknownFilter = errors.New("policy: filter not found")
// ErrUnknownSelector is returned when a value of IN is unknown.
ErrUnknownSelector = errors.New("policy: selector not found")
// ErrSyntaxError is returned for errors found by ANTLR parser.
ErrSyntaxError = errors.New("policy: syntax error")
)
func parse(s string) (*query, error) {
q := new(query)
err := parser.Parse(strings.NewReader(s), q)
if err != nil {
return nil, err
}
return q, nil
type policyVisitor struct {
errors []error
parser.BaseQueryVisitor
antlr.DefaultErrorListener
}
// Parse parses s into a placement policy.
func Parse(s string) (*netmap.PlacementPolicy, error) {
q, err := parse(s)
if err != nil {
return parse(s)
}
func newPolicyVisitor() *policyVisitor {
return &policyVisitor{}
}
func parse(s string) (*netmap.PlacementPolicy, error) {
input := antlr.NewInputStream(s)
lexer := parser.NewQueryLexer(input)
stream := antlr.NewCommonTokenStream(lexer, 0)
p := parser.NewQuery(stream)
p.BuildParseTrees = true
v := newPolicyVisitor()
p.RemoveErrorListeners()
p.AddErrorListener(v)
pl := p.Policy().Accept(v)
if len(v.errors) != 0 {
return nil, v.errors[0]
}
if err := validatePolicy(pl.(*netmap.PlacementPolicy)); err != nil {
return nil, err
}
return pl.(*netmap.PlacementPolicy), nil
}
seenFilters := map[string]bool{}
fs := make([]*netmap.Filter, 0, len(q.Filters))
for _, qf := range q.Filters {
f, err := filterFromOrChain(qf.Value, seenFilters)
if err != nil {
return nil, err
func (p *policyVisitor) SyntaxError(recognizer antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) {
p.reportError(fmt.Errorf("%w: line %d:%d %s", ErrSyntaxError, line, column, msg))
}
func (p *policyVisitor) reportError(err error) interface{} {
p.errors = append(p.errors, err)
return nil
}
// VisitPolicy implements parser.QueryVisitor interface.
func (p *policyVisitor) VisitPolicy(ctx *parser.PolicyContext) interface{} {
if len(p.errors) != 0 {
return nil
}
pl := new(netmap.PlacementPolicy)
repStmts := ctx.AllRepStmt()
rs := make([]*netmap.Replica, 0, len(repStmts))
for _, r := range repStmts {
res, ok := r.Accept(p).(*netmap.Replica)
if !ok {
return nil
}
f.SetName(qf.Name)
fs = append(fs, f)
seenFilters[qf.Name] = true
rs = append(rs, res)
}
pl.SetReplicas(rs...)
if cbfStmt := ctx.CbfStmt(); cbfStmt != nil {
cbf, ok := cbfStmt.(*parser.CbfStmtContext).Accept(p).(uint32)
if !ok {
return nil
}
pl.SetContainerBackupFactor(cbf)
}
selStmts := ctx.AllSelectStmt()
ss := make([]*netmap.Selector, 0, len(selStmts))
for _, s := range selStmts {
res, ok := s.Accept(p).(*netmap.Selector)
if !ok {
return nil
}
ss = append(ss, res)
}
pl.SetSelectors(ss...)
filtStmts := ctx.AllFilterStmt()
fs := make([]*netmap.Filter, 0, len(filtStmts))
for _, f := range filtStmts {
fs = append(fs, f.Accept(p).(*netmap.Filter))
}
pl.SetFilters(fs...)
return pl
}
func (p *policyVisitor) VisitCbfStmt(ctx *parser.CbfStmtContext) interface{} {
cbf, err := strconv.ParseUint(ctx.GetBackupFactor().GetText(), 10, 32)
if err != nil {
return p.reportError(ErrInvalidNumber)
}
return uint32(cbf)
}
// VisitRepStmt implements parser.QueryVisitor interface.
func (p *policyVisitor) VisitRepStmt(ctx *parser.RepStmtContext) interface{} {
num, err := strconv.ParseUint(ctx.GetCount().GetText(), 10, 32)
if err != nil {
return p.reportError(ErrInvalidNumber)
}
rs := new(netmap.Replica)
rs.SetCount(uint32(num))
if sel := ctx.GetSelector(); sel != nil {
rs.SetSelector(sel.GetText())
}
return rs
}
// VisitSelectStmt implements parser.QueryVisitor interface.
func (p *policyVisitor) VisitSelectStmt(ctx *parser.SelectStmtContext) interface{} {
res, err := strconv.ParseUint(ctx.GetCount().GetText(), 10, 32)
if err != nil {
return p.reportError(ErrInvalidNumber)
}
s := new(netmap.Selector)
s.SetCount(uint32(res))
if clStmt := ctx.Clause(); clStmt != nil {
s.SetClause(clauseFromString(clStmt.GetText()))
}
if bStmt := ctx.GetBucket(); bStmt != nil {
s.SetAttribute(ctx.GetBucket().GetText())
}
s.SetFilter(ctx.GetFilter().GetText()) // either ident or wildcard
if ctx.AS() != nil {
s.SetName(ctx.GetName().GetText())
}
return s
}
// VisitFilterStmt implements parser.QueryVisitor interface.
func (p *policyVisitor) VisitFilterStmt(ctx *parser.FilterStmtContext) interface{} {
f := p.VisitFilterExpr(ctx.GetExpr().(*parser.FilterExprContext)).(*netmap.Filter)
f.SetName(ctx.GetName().GetText())
return f
}
func (p *policyVisitor) VisitFilterExpr(ctx *parser.FilterExprContext) interface{} {
if eCtx := ctx.Expr(); eCtx != nil {
return eCtx.Accept(p)
}
f := new(netmap.Filter)
op := operationFromString(ctx.GetOp().GetText())
f.SetOperation(op)
f1 := ctx.GetF1().Accept(p).(*netmap.Filter)
f2 := ctx.GetF2().Accept(p).(*netmap.Filter)
// Consider f1=(.. AND ..) AND f2. This can be merged because our AND operation
// is of arbitrary arity. ANTLR generates left-associative parse-tree by default.
if f1.Operation() == op {
f.SetInnerFilters(append(f1.InnerFilters(), f2)...)
return f
}
f.SetInnerFilters(f1, f2)
return f
}
// VisitFilterKey implements parser.QueryVisitor interface.
func (p *policyVisitor) VisitFilterKey(ctx *parser.FilterKeyContext) interface{} {
if id := ctx.Ident(); id != nil {
return id.GetText()
}
str := ctx.STRING().GetText()
return str[1 : len(str)-1]
}
func (p *policyVisitor) VisitFilterValue(ctx *parser.FilterValueContext) interface{} {
if id := ctx.Ident(); id != nil {
return id.GetText()
}
if num := ctx.Number(); num != nil {
return num.GetText()
}
str := ctx.STRING().GetText()
return str[1 : len(str)-1]
}
// VisitExpr implements parser.QueryVisitor interface.
func (p *policyVisitor) VisitExpr(ctx *parser.ExprContext) interface{} {
f := new(netmap.Filter)
if flt := ctx.GetFilter(); flt != nil {
f.SetName(flt.GetText())
return f
}
key := ctx.GetKey().Accept(p)
opStr := ctx.SIMPLE_OP().GetText()
value := ctx.GetValue().Accept(p)
f.SetKey(key.(string))
f.SetOperation(operationFromString(opStr))
f.SetValue(value.(string))
return f
}
// validatePolicy checks high-level constraints such as filter link in SELECT
// being actually defined in FILTER section.
func validatePolicy(p *netmap.PlacementPolicy) error {
seenFilters := map[string]bool{}
for _, f := range p.Filters() {
seenFilters[f.Name()] = true
}
seenSelectors := map[string]bool{}
ss := make([]*netmap.Selector, 0, len(q.Selectors))
for _, qs := range q.Selectors {
if qs.Filter != netmap.MainFilterName && !seenFilters[qs.Filter] {
return nil, fmt.Errorf("%w: '%s'", ErrUnknownFilter, qs.Filter)
for _, s := range p.Selectors() {
if flt := s.Filter(); flt != netmap.MainFilterName && !seenFilters[flt] {
return fmt.Errorf("%w: '%s'", ErrUnknownFilter, flt)
}
s := netmap.NewSelector()
switch len(qs.Bucket) {
case 1: // only bucket
s.SetAttribute(qs.Bucket[0])
case 2: // clause + bucket
s.SetClause(clauseFromString(qs.Bucket[0]))
s.SetAttribute(qs.Bucket[1])
}
s.SetName(qs.Name)
seenSelectors[qs.Name] = true
s.SetFilter(qs.Filter)
if qs.Count == 0 {
return nil, fmt.Errorf("%w: SELECT", ErrInvalidNumber)
}
s.SetCount(qs.Count)
ss = append(ss, s)
seenSelectors[s.Name()] = true
}
rs := make([]*netmap.Replica, 0, len(q.Replicas))
for _, qr := range q.Replicas {
r := netmap.NewReplica()
if qr.Selector != "" {
if !seenSelectors[qr.Selector] {
return nil, fmt.Errorf("%w: '%s'", ErrUnknownSelector, qr.Selector)
}
r.SetSelector(qr.Selector)
for _, r := range p.Replicas() {
if sel := r.Selector(); sel != "" && !seenSelectors[sel] {
return fmt.Errorf("%w: '%s'", ErrUnknownSelector, sel)
}
if qr.Count == 0 {
return nil, fmt.Errorf("%w: REP", ErrInvalidNumber)
}
r.SetCount(uint32(qr.Count))
rs = append(rs, r)
}
p := new(netmap.PlacementPolicy)
p.SetFilters(fs...)
p.SetSelectors(ss...)
p.SetReplicas(rs...)
p.SetContainerBackupFactor(q.CBF)
return p, nil
return nil
}
func clauseFromString(s string) netmap.Clause {
@ -105,74 +277,31 @@ func clauseFromString(s string) netmap.Clause {
case "DISTINCT":
return netmap.ClauseDistinct
default:
return 0
// Such errors should be handled by ANTLR code thus this panic.
panic(fmt.Errorf("BUG: invalid clause: %s", s))
}
}
func filterFromOrChain(expr *orChain, seen map[string]bool) (*netmap.Filter, error) {
var fs []*netmap.Filter
for _, ac := range expr.Clauses {
f, err := filterFromAndChain(ac, seen)
if err != nil {
return nil, err
}
fs = append(fs, f)
}
if len(fs) == 1 {
return fs[0], nil
}
f := netmap.NewFilter()
f.SetOperation(netmap.OpOR)
f.SetInnerFilters(fs...)
return f, nil
}
func filterFromAndChain(expr *andChain, seen map[string]bool) (*netmap.Filter, error) {
var fs []*netmap.Filter
for _, fe := range expr.Clauses {
var f *netmap.Filter
var err error
if fe.Expr != nil {
f, err = filterFromSimpleExpr(fe.Expr, seen)
} else {
f = netmap.NewFilter()
f.SetName(fe.Reference)
}
if err != nil {
return nil, err
}
fs = append(fs, f)
}
if len(fs) == 1 {
return fs[0], nil
}
f := netmap.NewFilter()
f.SetOperation(netmap.OpAND)
f.SetInnerFilters(fs...)
return f, nil
}
func filterFromSimpleExpr(se *simpleExpr, seen map[string]bool) (*netmap.Filter, error) {
f := netmap.NewFilter()
f.SetKey(se.Key)
switch se.Op {
func operationFromString(op string) netmap.Operation {
switch strings.ToUpper(op) {
case "AND":
return netmap.OpAND
case "OR":
return netmap.OpOR
case "EQ":
f.SetOperation(netmap.OpEQ)
return netmap.OpEQ
case "NE":
f.SetOperation(netmap.OpNE)
return netmap.OpNE
case "GE":
f.SetOperation(netmap.OpGE)
return netmap.OpGE
case "GT":
f.SetOperation(netmap.OpGT)
return netmap.OpGT
case "LE":
f.SetOperation(netmap.OpLE)
return netmap.OpLE
case "LT":
f.SetOperation(netmap.OpLT)
return netmap.OpLT
default:
return nil, fmt.Errorf("%w: '%s'", ErrUnknownOp, se.Op)
// Such errors should be handled by ANTLR code thus this panic.
panic(fmt.Errorf("BUG: invalid operation: %s", op))
}
f.SetValue(se.Value)
return f, nil
}

View file

@ -2,6 +2,7 @@ package policy
import (
"errors"
"fmt"
"testing"
"github.com/nspcc-dev/neofs-api-go/v2/netmap"
@ -80,6 +81,32 @@ func TestFromSelectNoAttribute(t *testing.T) {
})
}
func TestString(t *testing.T) {
qTemplate := `REP 1
SELECT 1 IN City FROM Filt
FILTER Property EQ %s AND Something NE 7 AS Filt`
testCases := []string{
`"double-quoted"`,
`"with ' single"`,
`'single-quoted'`,
`'with " double'`,
}
for _, s := range testCases {
t.Run(s, func(t *testing.T) {
q := fmt.Sprintf(qTemplate, s)
r, err := Parse(q)
require.NoError(t, err)
expected := newFilter("Filt", "", "", netmap.AND,
newFilter("", "Property", s[1:len(s)-1], netmap.EQ),
newFilter("", "Something", "7", netmap.NE))
require.EqualValues(t, []*netmap.Filter{expected}, r.Filters())
})
}
}
func TestFromSelectClause(t *testing.T) {
q := `REP 4
SELECT 3 IN Country FROM *
@ -205,12 +232,12 @@ func TestValidation(t *testing.T) {
SELECT 1 IN City FROM F
FILTER Country KEK RU AS F`
_, err := Parse(q)
require.True(t, errors.Is(err, ErrUnknownOp), "got: %v", err)
require.True(t, errors.Is(err, ErrSyntaxError), "got: %v", err)
})
t.Run("TypoInREP", func(t *testing.T) {
q := `REK 3`
_, err := Parse(q)
require.Error(t, err)
require.True(t, errors.Is(err, ErrSyntaxError))
})
t.Run("InvalidFilterName", func(t *testing.T) {
q := `REP 3
@ -223,13 +250,13 @@ func TestValidation(t *testing.T) {
t.Run("InvalidNumberInREP", func(t *testing.T) {
q := `REP 0`
_, err := Parse(q)
require.True(t, errors.Is(err, ErrInvalidNumber), "got: %v", err)
require.True(t, errors.Is(err, ErrSyntaxError), "got: %v", err)
})
t.Run("InvalidNumberInREP", func(t *testing.T) {
q := `REP 1 IN Good
SELECT 0 IN City FROM *`
_, err := Parse(q)
require.True(t, errors.Is(err, ErrInvalidNumber), "got: %v", err)
require.True(t, errors.Is(err, ErrSyntaxError), "got: %v", err)
})
}