Add cmd/intrinsic-gen lexer and tokens

Ground work for the new intrinsic definition parser. Bug: tint:832 Change-Id: I341ae11e36ef7af96ce7d01609a96e2c02425e87 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/52500 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: David Neto <dneto@google.com>
2021-05-31 09:48:48 +00:00 · 2021-05-31 09:48:48 +00:00 · c95e05784d
parent d78f55390d
commit c95e05784d
3 changed files with 463 additions and 0 deletions
--- a/tools/src/cmd/intrinsic-gen/lexer/lexer.go
+++ b/tools/src/cmd/intrinsic-gen/lexer/lexer.go
@ -0,0 +1,200 @@
 // Copyright 2021 The Tint Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // Package lexer provides a basic lexer for the Tint intrinsic definition
 // language
 package lexer
 import (
 	"fmt"
 	"unicode"
 	"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
 )
 // Lex produces a list of tokens for the given source code
 func Lex(src []rune, filepath string) ([]tok.Token, error) {
 	l := lexer{
 		tok.Location{Line: 1, Column: 1, Rune: 0, Filepath: filepath},
 		src,
 		[]tok.Token{},
 	}
 	if err := l.lex(); err != nil {
 		return nil, err
 	}
 	return l.tokens, nil
 }
 type lexer struct {
 	loc    tok.Location
 	runes  []rune
 	tokens []tok.Token
 }
 // lex() lexes the source, populating l.tokens
 func (l *lexer) lex() error {
 	for {
 		switch l.peek(0) {
 		case 0:
 			return nil
 		case ' ', '\t':
 			l.next()
 		case '\n':
 			l.next()
 		case '<':
 			l.tok(1, tok.Lt)
 		case '>':
 			l.tok(1, tok.Gt)
 		case '(':
 			l.tok(1, tok.Lparen)
 		case ')':
 			l.tok(1, tok.Rparen)
 		case '{':
 			l.tok(1, tok.Lbrace)
 		case '}':
 			l.tok(1, tok.Rbrace)
 		case ':':
 			l.tok(1, tok.Colon)
 		case ',':
 			l.tok(1, tok.Comma)
 		case '|':
 			l.tok(1, tok.Or)
 		case '"':
 			start := l.loc
 			l.next() // Skip opening quote
 			n := l.count(toFirst('\n', '"'))
 			if l.peek(n) != '"' {
 				return fmt.Errorf("%v unterminated string", start)
 			}
 			l.tok(n, tok.String)
 			l.next() // Skip closing quote
 		default:
 			switch {
 			case l.peek(1) == '/':
 				l.skip(l.count(toFirst('\n')))
 				l.next() // Consume newline
 			case l.match("[[", tok.Ldeco):
 			case l.match("]]", tok.Rdeco):
 			case l.match("->", tok.Arrow):
 			case l.match("fn", tok.Function):
 			case l.match("enum", tok.Enum):
 			case l.match("type", tok.Type):
 			case l.match("match", tok.Match):
 			case unicode.IsLetter(l.peek(0)):
 				l.tok(l.count(alphaNumericOrUnderscore), tok.Identifier)
 			case unicode.IsNumber(l.peek(0)):
 				l.tok(l.count(unicode.IsNumber), tok.Integer)
 			default:
 				return fmt.Errorf("%v: unexpected '%v'", l.loc, string(l.runes[0]))
 			}
 		}
 	}
 }
 // next() consumes and returns the next rune in the source, or 0 if reached EOF
 func (l *lexer) next() rune {
 	if len(l.runes) > 0 {
 		r := l.runes[0]
 		l.runes = l.runes[1:]
 		l.loc.Rune++
 		if r == '\n' {
 			l.loc.Line++
 			l.loc.Column = 1
 		} else {
 			l.loc.Column++
 		}
 		return r
 	}
 	return 0
 }
 // skip() consumes the next `n` runes in the source
 func (l *lexer) skip(n int) {
 	for i := 0; i < n; i++ {
 		l.next()
 	}
 }
 // peek() returns the rune `i` runes ahead of the current position
 func (l *lexer) peek(i int) rune {
 	if i >= len(l.runes) {
 		return 0
 	}
 	return l.runes[i]
 }
 // predicate is a function that can be passed to count()
 type predicate func(r rune) bool
 // count() returns the number of sequential runes from the current position that
 // match the predicate `p`
 func (l *lexer) count(p predicate) int {
 	for i := 0; i < len(l.runes); i++ {
 		if !p(l.peek(i)) {
 			return i
 		}
 	}
 	return len(l.runes)
 }
 // tok() appends a new token of kind `k` using the next `n` runes.
 // The next `n` runes are consumed by tok().
 func (l *lexer) tok(n int, k tok.Kind) {
 	start := l.loc
 	runes := l.runes[:n]
 	l.skip(n)
 	end := l.loc
 	src := tok.Source{S: start, E: end}
 	l.tokens = append(l.tokens, tok.Token{Kind: k, Source: src, Runes: runes})
 }
 // match() checks whether the next runes are equal to `s`. If they are, then
 // these runes are used to append a new token of kind `k`, and match() returns
 // true. If the next runes are not equal to `s` then false is returned, and no
 // runes are consumed.
 func (l *lexer) match(s string, kind tok.Kind) bool {
 	runes := []rune(s)
 	if len(l.runes) < len(runes) {
 		return false
 	}
 	for i, r := range runes {
 		if l.runes[i] != r {
 			return false
 		}
 	}
 	l.tok(len(runes), kind)
 	return true
 }
 // toFirst() returns a predicate that returns true if the rune is not in `runes`
 // toFirst() is intended to be used with count(), so `count(toFirst('x'))` will
 // count up to, but not including the number of consecutive runes that are not
 // 'x'.
 func toFirst(runes ...rune) predicate {
 	return func(r rune) bool {
 		for _, t := range runes {
 			if t == r {
 				return false
 			}
 		}
 		return true
 	}
 }
 // alphaNumericOrUnderscore() returns true if the rune `r` is a number, letter
 // or underscore.
 func alphaNumericOrUnderscore(r rune) bool {
 	return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
 }
--- a/tools/src/cmd/intrinsic-gen/lexer/lexer_test.go
+++ b/tools/src/cmd/intrinsic-gen/lexer/lexer_test.go
@ -0,0 +1,144 @@
 // Copyright 2021 The Tint Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package lexer_test
 import (
 	"fmt"
 	"testing"
 	"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/lexer"
 	"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
 )
 func TestLexTokens(t *testing.T) {
 	type test struct {
 		src    string
 		expect tok.Token
 	}
 	filepath := "test.txt"
 	loc := func(l, c, r int) tok.Location {
 		return tok.Location{Line: l, Column: c, Rune: r, Filepath: filepath}
 	}
 	for _, test := range []test{
 		{"ident", tok.Token{Kind: tok.Identifier, Runes: []rune("ident"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 6, 5),
 		}}},
 		{"ident_123", tok.Token{Kind: tok.Identifier, Runes: []rune("ident_123"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 10, 9),
 		}}},
 		{"123456789", tok.Token{Kind: tok.Integer, Runes: []rune("123456789"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 10, 9),
 		}}},
 		{"match", tok.Token{Kind: tok.Match, Runes: []rune("match"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 6, 5),
 		}}},
 		{"fn", tok.Token{Kind: tok.Function, Runes: []rune("fn"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 3, 2),
 		}}},
 		{"type", tok.Token{Kind: tok.Type, Runes: []rune("type"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 5, 4),
 		}}},
 		{"enum", tok.Token{Kind: tok.Enum, Runes: []rune("enum"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 5, 4),
 		}}},
 		{":", tok.Token{Kind: tok.Colon, Runes: []rune(":"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{",", tok.Token{Kind: tok.Comma, Runes: []rune(","), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{"<", tok.Token{Kind: tok.Lt, Runes: []rune("<"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{">", tok.Token{Kind: tok.Gt, Runes: []rune(">"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{"{", tok.Token{Kind: tok.Lbrace, Runes: []rune("{"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{"}", tok.Token{Kind: tok.Rbrace, Runes: []rune("}"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{"[[", tok.Token{Kind: tok.Ldeco, Runes: []rune("[["), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 3, 2),
 		}}},
 		{"]]", tok.Token{Kind: tok.Rdeco, Runes: []rune("]]"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 3, 2),
 		}}},
 		{"(", tok.Token{Kind: tok.Lparen, Runes: []rune("("), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{")", tok.Token{Kind: tok.Rparen, Runes: []rune(")"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{"|", tok.Token{Kind: tok.Or, Runes: []rune("|"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{"->", tok.Token{Kind: tok.Arrow, Runes: []rune("->"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 3, 2),
 		}}},
 		{"x // y ", tok.Token{Kind: tok.Identifier, Runes: []rune("x"), Source: tok.Source{
 			S: loc(1, 1, 0), E: loc(1, 2, 1),
 		}}},
 		{`"abc"`, tok.Token{Kind: tok.String, Runes: []rune("abc"), Source: tok.Source{
 			S: loc(1, 2, 1), E: loc(1, 5, 4),
 		}}},
 		{`
   //
   ident
   `, tok.Token{Kind: tok.Identifier, Runes: []rune("ident"), Source: tok.Source{
 			S: loc(3, 4, 10), E: loc(3, 9, 15),
 		}}},
 	} {
 		got, err := lexer.Lex([]rune(test.src), filepath)
 		name := fmt.Sprintf(`Lex("%v")`, test.src)
 		switch {
 		case err != nil:
 			t.Errorf("%v returned error: %v", name, err)
 		case len(got) != 1:
 			t.Errorf("%v returned %d tokens: %v", name, len(got), got)
 		case got[0].Kind != test.expect.Kind:
 			t.Errorf(`%v returned unexpected token kind: got "%+v", expected "%+v"`, name, got[0], test.expect)
 		case string(got[0].Runes) != string(test.expect.Runes):
 			t.Errorf(`%v returned unexpected token runes: got "%+v", expected "%+v"`, name, string(got[0].Runes), string(test.expect.Runes))
 		case got[0].Source != test.expect.Source:
 			t.Errorf(`%v returned unexpected token source: got %+v, expected %+v`, name, got[0].Source, test.expect.Source)
 		}
 	}
 }
 func TestErrors(t *testing.T) {
 	type test struct {
 		src    string
 		expect string
 	}
 	for _, test := range []test{
 		{" \"abc", "test.txt:1:2 unterminated string"},
 		{" \"abc\n", "test.txt:1:2 unterminated string"},
 		{"*", "test.txt:1:1: unexpected '*'"},
 	} {
 		got, err := lexer.Lex([]rune(test.src), "test.txt")
 		if gotErr := err.Error(); test.expect != gotErr {
 			t.Errorf(`Lex() returned error "%+v", expected error "%+v"`, gotErr, test.expect)
 		}
 		if got != nil {
 			t.Errorf("Lex() returned non-nil for error")
 		}
 	}
 }
--- a/tools/src/cmd/intrinsic-gen/tok/tok.go
+++ b/tools/src/cmd/intrinsic-gen/tok/tok.go
@ -0,0 +1,119 @@
 // Copyright 2021 The Tint Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // Package tok defines tokens that are produced by the Tint intrinsic definition
 // lexer
 package tok
 import "fmt"
 // Kind is an enumerator of token kinds
 type Kind string
 // Token enumerator types
 const (
 	InvalidToken Kind = "<invalid>"
 	Identifier   Kind = "ident"
 	Integer      Kind = "integer"
 	String       Kind = "string"
 	Match        Kind = "match"
 	Function     Kind = "fn"
 	Type         Kind = "type"
 	Enum         Kind = "enum"
 	Colon        Kind = ":"
 	Comma        Kind = ","
 	Lt           Kind = "<"
 	Gt           Kind = ">"
 	Lbrace       Kind = "{"
 	Rbrace       Kind = "}"
 	Ldeco        Kind = "[["
 	Rdeco        Kind = "]]"
 	Lparen       Kind = "("
 	Rparen       Kind = ")"
 	Or           Kind = "|"
 	Arrow        Kind = "->"
 )
 // Invalid represents an invalid token
 var Invalid = Token{Kind: InvalidToken}
 // Location describes a rune location in the source code
 type Location struct {
 	// 1-based line index
 	Line int
 	// 1-based column index
 	Column int
 	// 0-based rune index
 	Rune int
 	// Optional file path
 	Filepath string
 }
 // Format implements the fmt.Formatter interface
 func (l Location) Format(w fmt.State, verb rune) {
 	if w.Flag('+') {
 		if l.Filepath != "" {
 			fmt.Fprintf(w, "%v:%v:%v[%v]", l.Filepath, l.Line, l.Column, l.Rune)
 		} else {
 			fmt.Fprintf(w, "%v:%v[%v]", l.Line, l.Column, l.Rune)
 		}
 	} else {
 		if l.Filepath != "" {
 			fmt.Fprintf(w, "%v:%v:%v", l.Filepath, l.Line, l.Column)
 		} else {
 			fmt.Fprintf(w, "%v:%v", l.Line, l.Column)
 		}
 	}
 }
 // Source describes a start and end range in the source code
 type Source struct {
 	S, E Location
 }
 // IsValid returns true if the source is valid
 func (s Source) IsValid() bool {
 	return s.S.Line != 0 && s.S.Column != 0 && s.E.Line != 0 && s.E.Column != 0
 }
 // Format implements the fmt.Formatter interface
 func (s Source) Format(w fmt.State, verb rune) {
 	if w.Flag('+') {
 		fmt.Fprint(w, "[")
 		s.S.Format(w, verb)
 		fmt.Fprint(w, " - ")
 		s.E.Format(w, verb)
 		fmt.Fprint(w, "]")
 	} else {
 		s.S.Format(w, verb)
 	}
 }
 // Token describes a parsed token
 type Token struct {
 	Kind   Kind
 	Runes  []rune
 	Source Source
 }
 // Format implements the fmt.Formatter interface
 func (t Token) Format(w fmt.State, verb rune) {
 	fmt.Fprint(w, "[")
 	t.Source.Format(w, verb)
 	fmt.Fprint(w, " ")
 	fmt.Fprint(w, t.Kind)
 	fmt.Fprint(w, " ")
 	fmt.Fprint(w, string(t.Runes))
 	fmt.Fprint(w, "]")
 }