Add cmd/intrinsic-gen lexer and tokens

Ground work for the new intrinsic definition parser.

Bug: tint:832
Change-Id: I341ae11e36ef7af96ce7d01609a96e2c02425e87
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/52500
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
Ben Clayton 2021-05-31 09:48:48 +00:00
parent d78f55390d
commit c95e05784d
3 changed files with 463 additions and 0 deletions

View File

@ -0,0 +1,200 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package lexer provides a basic lexer for the Tint intrinsic definition
// language
package lexer
import (
"fmt"
"unicode"
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
)
// Lex produces a list of tokens for the given source code
func Lex(src []rune, filepath string) ([]tok.Token, error) {
l := lexer{
tok.Location{Line: 1, Column: 1, Rune: 0, Filepath: filepath},
src,
[]tok.Token{},
}
if err := l.lex(); err != nil {
return nil, err
}
return l.tokens, nil
}
type lexer struct {
loc tok.Location
runes []rune
tokens []tok.Token
}
// lex() lexes the source, populating l.tokens
func (l *lexer) lex() error {
for {
switch l.peek(0) {
case 0:
return nil
case ' ', '\t':
l.next()
case '\n':
l.next()
case '<':
l.tok(1, tok.Lt)
case '>':
l.tok(1, tok.Gt)
case '(':
l.tok(1, tok.Lparen)
case ')':
l.tok(1, tok.Rparen)
case '{':
l.tok(1, tok.Lbrace)
case '}':
l.tok(1, tok.Rbrace)
case ':':
l.tok(1, tok.Colon)
case ',':
l.tok(1, tok.Comma)
case '|':
l.tok(1, tok.Or)
case '"':
start := l.loc
l.next() // Skip opening quote
n := l.count(toFirst('\n', '"'))
if l.peek(n) != '"' {
return fmt.Errorf("%v unterminated string", start)
}
l.tok(n, tok.String)
l.next() // Skip closing quote
default:
switch {
case l.peek(1) == '/':
l.skip(l.count(toFirst('\n')))
l.next() // Consume newline
case l.match("[[", tok.Ldeco):
case l.match("]]", tok.Rdeco):
case l.match("->", tok.Arrow):
case l.match("fn", tok.Function):
case l.match("enum", tok.Enum):
case l.match("type", tok.Type):
case l.match("match", tok.Match):
case unicode.IsLetter(l.peek(0)):
l.tok(l.count(alphaNumericOrUnderscore), tok.Identifier)
case unicode.IsNumber(l.peek(0)):
l.tok(l.count(unicode.IsNumber), tok.Integer)
default:
return fmt.Errorf("%v: unexpected '%v'", l.loc, string(l.runes[0]))
}
}
}
}
// next() consumes and returns the next rune in the source, or 0 if reached EOF
func (l *lexer) next() rune {
if len(l.runes) > 0 {
r := l.runes[0]
l.runes = l.runes[1:]
l.loc.Rune++
if r == '\n' {
l.loc.Line++
l.loc.Column = 1
} else {
l.loc.Column++
}
return r
}
return 0
}
// skip() consumes the next `n` runes in the source
func (l *lexer) skip(n int) {
for i := 0; i < n; i++ {
l.next()
}
}
// peek() returns the rune `i` runes ahead of the current position
func (l *lexer) peek(i int) rune {
if i >= len(l.runes) {
return 0
}
return l.runes[i]
}
// predicate is a function that can be passed to count()
type predicate func(r rune) bool
// count() returns the number of sequential runes from the current position that
// match the predicate `p`
func (l *lexer) count(p predicate) int {
for i := 0; i < len(l.runes); i++ {
if !p(l.peek(i)) {
return i
}
}
return len(l.runes)
}
// tok() appends a new token of kind `k` using the next `n` runes.
// The next `n` runes are consumed by tok().
func (l *lexer) tok(n int, k tok.Kind) {
start := l.loc
runes := l.runes[:n]
l.skip(n)
end := l.loc
src := tok.Source{S: start, E: end}
l.tokens = append(l.tokens, tok.Token{Kind: k, Source: src, Runes: runes})
}
// match() checks whether the next runes are equal to `s`. If they are, then
// these runes are used to append a new token of kind `k`, and match() returns
// true. If the next runes are not equal to `s` then false is returned, and no
// runes are consumed.
func (l *lexer) match(s string, kind tok.Kind) bool {
runes := []rune(s)
if len(l.runes) < len(runes) {
return false
}
for i, r := range runes {
if l.runes[i] != r {
return false
}
}
l.tok(len(runes), kind)
return true
}
// toFirst() returns a predicate that returns true if the rune is not in `runes`
// toFirst() is intended to be used with count(), so `count(toFirst('x'))` will
// count up to, but not including the number of consecutive runes that are not
// 'x'.
func toFirst(runes ...rune) predicate {
return func(r rune) bool {
for _, t := range runes {
if t == r {
return false
}
}
return true
}
}
// alphaNumericOrUnderscore() returns true if the rune `r` is a number, letter
// or underscore.
func alphaNumericOrUnderscore(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
}

View File

@ -0,0 +1,144 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package lexer_test
import (
"fmt"
"testing"
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/lexer"
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
)
func TestLexTokens(t *testing.T) {
type test struct {
src string
expect tok.Token
}
filepath := "test.txt"
loc := func(l, c, r int) tok.Location {
return tok.Location{Line: l, Column: c, Rune: r, Filepath: filepath}
}
for _, test := range []test{
{"ident", tok.Token{Kind: tok.Identifier, Runes: []rune("ident"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 6, 5),
}}},
{"ident_123", tok.Token{Kind: tok.Identifier, Runes: []rune("ident_123"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 10, 9),
}}},
{"123456789", tok.Token{Kind: tok.Integer, Runes: []rune("123456789"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 10, 9),
}}},
{"match", tok.Token{Kind: tok.Match, Runes: []rune("match"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 6, 5),
}}},
{"fn", tok.Token{Kind: tok.Function, Runes: []rune("fn"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 3, 2),
}}},
{"type", tok.Token{Kind: tok.Type, Runes: []rune("type"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 5, 4),
}}},
{"enum", tok.Token{Kind: tok.Enum, Runes: []rune("enum"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 5, 4),
}}},
{":", tok.Token{Kind: tok.Colon, Runes: []rune(":"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{",", tok.Token{Kind: tok.Comma, Runes: []rune(","), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{"<", tok.Token{Kind: tok.Lt, Runes: []rune("<"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{">", tok.Token{Kind: tok.Gt, Runes: []rune(">"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{"{", tok.Token{Kind: tok.Lbrace, Runes: []rune("{"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{"}", tok.Token{Kind: tok.Rbrace, Runes: []rune("}"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{"[[", tok.Token{Kind: tok.Ldeco, Runes: []rune("[["), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 3, 2),
}}},
{"]]", tok.Token{Kind: tok.Rdeco, Runes: []rune("]]"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 3, 2),
}}},
{"(", tok.Token{Kind: tok.Lparen, Runes: []rune("("), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{")", tok.Token{Kind: tok.Rparen, Runes: []rune(")"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{"|", tok.Token{Kind: tok.Or, Runes: []rune("|"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{"->", tok.Token{Kind: tok.Arrow, Runes: []rune("->"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 3, 2),
}}},
{"x // y ", tok.Token{Kind: tok.Identifier, Runes: []rune("x"), Source: tok.Source{
S: loc(1, 1, 0), E: loc(1, 2, 1),
}}},
{`"abc"`, tok.Token{Kind: tok.String, Runes: []rune("abc"), Source: tok.Source{
S: loc(1, 2, 1), E: loc(1, 5, 4),
}}},
{`
//
ident
`, tok.Token{Kind: tok.Identifier, Runes: []rune("ident"), Source: tok.Source{
S: loc(3, 4, 10), E: loc(3, 9, 15),
}}},
} {
got, err := lexer.Lex([]rune(test.src), filepath)
name := fmt.Sprintf(`Lex("%v")`, test.src)
switch {
case err != nil:
t.Errorf("%v returned error: %v", name, err)
case len(got) != 1:
t.Errorf("%v returned %d tokens: %v", name, len(got), got)
case got[0].Kind != test.expect.Kind:
t.Errorf(`%v returned unexpected token kind: got "%+v", expected "%+v"`, name, got[0], test.expect)
case string(got[0].Runes) != string(test.expect.Runes):
t.Errorf(`%v returned unexpected token runes: got "%+v", expected "%+v"`, name, string(got[0].Runes), string(test.expect.Runes))
case got[0].Source != test.expect.Source:
t.Errorf(`%v returned unexpected token source: got %+v, expected %+v`, name, got[0].Source, test.expect.Source)
}
}
}
func TestErrors(t *testing.T) {
type test struct {
src string
expect string
}
for _, test := range []test{
{" \"abc", "test.txt:1:2 unterminated string"},
{" \"abc\n", "test.txt:1:2 unterminated string"},
{"*", "test.txt:1:1: unexpected '*'"},
} {
got, err := lexer.Lex([]rune(test.src), "test.txt")
if gotErr := err.Error(); test.expect != gotErr {
t.Errorf(`Lex() returned error "%+v", expected error "%+v"`, gotErr, test.expect)
}
if got != nil {
t.Errorf("Lex() returned non-nil for error")
}
}
}

View File

@ -0,0 +1,119 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package tok defines tokens that are produced by the Tint intrinsic definition
// lexer
package tok
import "fmt"
// Kind is an enumerator of token kinds
type Kind string
// Token enumerator types
const (
InvalidToken Kind = "<invalid>"
Identifier Kind = "ident"
Integer Kind = "integer"
String Kind = "string"
Match Kind = "match"
Function Kind = "fn"
Type Kind = "type"
Enum Kind = "enum"
Colon Kind = ":"
Comma Kind = ","
Lt Kind = "<"
Gt Kind = ">"
Lbrace Kind = "{"
Rbrace Kind = "}"
Ldeco Kind = "[["
Rdeco Kind = "]]"
Lparen Kind = "("
Rparen Kind = ")"
Or Kind = "|"
Arrow Kind = "->"
)
// Invalid represents an invalid token
var Invalid = Token{Kind: InvalidToken}
// Location describes a rune location in the source code
type Location struct {
// 1-based line index
Line int
// 1-based column index
Column int
// 0-based rune index
Rune int
// Optional file path
Filepath string
}
// Format implements the fmt.Formatter interface
func (l Location) Format(w fmt.State, verb rune) {
if w.Flag('+') {
if l.Filepath != "" {
fmt.Fprintf(w, "%v:%v:%v[%v]", l.Filepath, l.Line, l.Column, l.Rune)
} else {
fmt.Fprintf(w, "%v:%v[%v]", l.Line, l.Column, l.Rune)
}
} else {
if l.Filepath != "" {
fmt.Fprintf(w, "%v:%v:%v", l.Filepath, l.Line, l.Column)
} else {
fmt.Fprintf(w, "%v:%v", l.Line, l.Column)
}
}
}
// Source describes a start and end range in the source code
type Source struct {
S, E Location
}
// IsValid returns true if the source is valid
func (s Source) IsValid() bool {
return s.S.Line != 0 && s.S.Column != 0 && s.E.Line != 0 && s.E.Column != 0
}
// Format implements the fmt.Formatter interface
func (s Source) Format(w fmt.State, verb rune) {
if w.Flag('+') {
fmt.Fprint(w, "[")
s.S.Format(w, verb)
fmt.Fprint(w, " - ")
s.E.Format(w, verb)
fmt.Fprint(w, "]")
} else {
s.S.Format(w, verb)
}
}
// Token describes a parsed token
type Token struct {
Kind Kind
Runes []rune
Source Source
}
// Format implements the fmt.Formatter interface
func (t Token) Format(w fmt.State, verb rune) {
fmt.Fprint(w, "[")
t.Source.Format(w, verb)
fmt.Fprint(w, " ")
fmt.Fprint(w, t.Kind)
fmt.Fprint(w, " ")
fmt.Fprint(w, string(t.Runes))
fmt.Fprint(w, "]")
}