Add cmd/intrinsic-gen lexer and tokens
Ground work for the new intrinsic definition parser. Bug: tint:832 Change-Id: I341ae11e36ef7af96ce7d01609a96e2c02425e87 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/52500 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
parent
d78f55390d
commit
c95e05784d
|
@ -0,0 +1,200 @@
|
||||||
|
// Copyright 2021 The Tint Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package lexer provides a basic lexer for the Tint intrinsic definition
|
||||||
|
// language
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
|
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Lex produces a list of tokens for the given source code
|
||||||
|
func Lex(src []rune, filepath string) ([]tok.Token, error) {
|
||||||
|
l := lexer{
|
||||||
|
tok.Location{Line: 1, Column: 1, Rune: 0, Filepath: filepath},
|
||||||
|
src,
|
||||||
|
[]tok.Token{},
|
||||||
|
}
|
||||||
|
if err := l.lex(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return l.tokens, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type lexer struct {
|
||||||
|
loc tok.Location
|
||||||
|
runes []rune
|
||||||
|
tokens []tok.Token
|
||||||
|
}
|
||||||
|
|
||||||
|
// lex() lexes the source, populating l.tokens
|
||||||
|
func (l *lexer) lex() error {
|
||||||
|
for {
|
||||||
|
switch l.peek(0) {
|
||||||
|
case 0:
|
||||||
|
return nil
|
||||||
|
case ' ', '\t':
|
||||||
|
l.next()
|
||||||
|
case '\n':
|
||||||
|
l.next()
|
||||||
|
case '<':
|
||||||
|
l.tok(1, tok.Lt)
|
||||||
|
case '>':
|
||||||
|
l.tok(1, tok.Gt)
|
||||||
|
case '(':
|
||||||
|
l.tok(1, tok.Lparen)
|
||||||
|
case ')':
|
||||||
|
l.tok(1, tok.Rparen)
|
||||||
|
case '{':
|
||||||
|
l.tok(1, tok.Lbrace)
|
||||||
|
case '}':
|
||||||
|
l.tok(1, tok.Rbrace)
|
||||||
|
case ':':
|
||||||
|
l.tok(1, tok.Colon)
|
||||||
|
case ',':
|
||||||
|
l.tok(1, tok.Comma)
|
||||||
|
case '|':
|
||||||
|
l.tok(1, tok.Or)
|
||||||
|
case '"':
|
||||||
|
start := l.loc
|
||||||
|
l.next() // Skip opening quote
|
||||||
|
n := l.count(toFirst('\n', '"'))
|
||||||
|
if l.peek(n) != '"' {
|
||||||
|
return fmt.Errorf("%v unterminated string", start)
|
||||||
|
}
|
||||||
|
l.tok(n, tok.String)
|
||||||
|
l.next() // Skip closing quote
|
||||||
|
default:
|
||||||
|
switch {
|
||||||
|
case l.peek(1) == '/':
|
||||||
|
l.skip(l.count(toFirst('\n')))
|
||||||
|
l.next() // Consume newline
|
||||||
|
case l.match("[[", tok.Ldeco):
|
||||||
|
case l.match("]]", tok.Rdeco):
|
||||||
|
case l.match("->", tok.Arrow):
|
||||||
|
case l.match("fn", tok.Function):
|
||||||
|
case l.match("enum", tok.Enum):
|
||||||
|
case l.match("type", tok.Type):
|
||||||
|
case l.match("match", tok.Match):
|
||||||
|
case unicode.IsLetter(l.peek(0)):
|
||||||
|
l.tok(l.count(alphaNumericOrUnderscore), tok.Identifier)
|
||||||
|
case unicode.IsNumber(l.peek(0)):
|
||||||
|
l.tok(l.count(unicode.IsNumber), tok.Integer)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("%v: unexpected '%v'", l.loc, string(l.runes[0]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// next() consumes and returns the next rune in the source, or 0 if reached EOF
|
||||||
|
func (l *lexer) next() rune {
|
||||||
|
if len(l.runes) > 0 {
|
||||||
|
r := l.runes[0]
|
||||||
|
l.runes = l.runes[1:]
|
||||||
|
l.loc.Rune++
|
||||||
|
if r == '\n' {
|
||||||
|
l.loc.Line++
|
||||||
|
l.loc.Column = 1
|
||||||
|
} else {
|
||||||
|
l.loc.Column++
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip() consumes the next `n` runes in the source
|
||||||
|
func (l *lexer) skip(n int) {
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// peek() returns the rune `i` runes ahead of the current position
|
||||||
|
func (l *lexer) peek(i int) rune {
|
||||||
|
if i >= len(l.runes) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return l.runes[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
// predicate is a function that can be passed to count()
|
||||||
|
type predicate func(r rune) bool
|
||||||
|
|
||||||
|
// count() returns the number of sequential runes from the current position that
|
||||||
|
// match the predicate `p`
|
||||||
|
func (l *lexer) count(p predicate) int {
|
||||||
|
for i := 0; i < len(l.runes); i++ {
|
||||||
|
if !p(l.peek(i)) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(l.runes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tok() appends a new token of kind `k` using the next `n` runes.
|
||||||
|
// The next `n` runes are consumed by tok().
|
||||||
|
func (l *lexer) tok(n int, k tok.Kind) {
|
||||||
|
start := l.loc
|
||||||
|
runes := l.runes[:n]
|
||||||
|
l.skip(n)
|
||||||
|
end := l.loc
|
||||||
|
|
||||||
|
src := tok.Source{S: start, E: end}
|
||||||
|
l.tokens = append(l.tokens, tok.Token{Kind: k, Source: src, Runes: runes})
|
||||||
|
}
|
||||||
|
|
||||||
|
// match() checks whether the next runes are equal to `s`. If they are, then
|
||||||
|
// these runes are used to append a new token of kind `k`, and match() returns
|
||||||
|
// true. If the next runes are not equal to `s` then false is returned, and no
|
||||||
|
// runes are consumed.
|
||||||
|
func (l *lexer) match(s string, kind tok.Kind) bool {
|
||||||
|
runes := []rune(s)
|
||||||
|
if len(l.runes) < len(runes) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i, r := range runes {
|
||||||
|
if l.runes[i] != r {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l.tok(len(runes), kind)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// toFirst() returns a predicate that returns true if the rune is not in `runes`
|
||||||
|
// toFirst() is intended to be used with count(), so `count(toFirst('x'))` will
|
||||||
|
// count up to, but not including the number of consecutive runes that are not
|
||||||
|
// 'x'.
|
||||||
|
func toFirst(runes ...rune) predicate {
|
||||||
|
return func(r rune) bool {
|
||||||
|
for _, t := range runes {
|
||||||
|
if t == r {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// alphaNumericOrUnderscore() returns true if the rune `r` is a number, letter
|
||||||
|
// or underscore.
|
||||||
|
func alphaNumericOrUnderscore(r rune) bool {
|
||||||
|
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
|
||||||
|
}
|
|
@ -0,0 +1,144 @@
|
||||||
|
// Copyright 2021 The Tint Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package lexer_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/lexer"
|
||||||
|
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLexTokens(t *testing.T) {
|
||||||
|
type test struct {
|
||||||
|
src string
|
||||||
|
expect tok.Token
|
||||||
|
}
|
||||||
|
|
||||||
|
filepath := "test.txt"
|
||||||
|
loc := func(l, c, r int) tok.Location {
|
||||||
|
return tok.Location{Line: l, Column: c, Rune: r, Filepath: filepath}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range []test{
|
||||||
|
{"ident", tok.Token{Kind: tok.Identifier, Runes: []rune("ident"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 6, 5),
|
||||||
|
}}},
|
||||||
|
{"ident_123", tok.Token{Kind: tok.Identifier, Runes: []rune("ident_123"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 10, 9),
|
||||||
|
}}},
|
||||||
|
{"123456789", tok.Token{Kind: tok.Integer, Runes: []rune("123456789"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 10, 9),
|
||||||
|
}}},
|
||||||
|
{"match", tok.Token{Kind: tok.Match, Runes: []rune("match"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 6, 5),
|
||||||
|
}}},
|
||||||
|
{"fn", tok.Token{Kind: tok.Function, Runes: []rune("fn"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 3, 2),
|
||||||
|
}}},
|
||||||
|
{"type", tok.Token{Kind: tok.Type, Runes: []rune("type"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 5, 4),
|
||||||
|
}}},
|
||||||
|
{"enum", tok.Token{Kind: tok.Enum, Runes: []rune("enum"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 5, 4),
|
||||||
|
}}},
|
||||||
|
{":", tok.Token{Kind: tok.Colon, Runes: []rune(":"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{",", tok.Token{Kind: tok.Comma, Runes: []rune(","), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{"<", tok.Token{Kind: tok.Lt, Runes: []rune("<"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{">", tok.Token{Kind: tok.Gt, Runes: []rune(">"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{"{", tok.Token{Kind: tok.Lbrace, Runes: []rune("{"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{"}", tok.Token{Kind: tok.Rbrace, Runes: []rune("}"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{"[[", tok.Token{Kind: tok.Ldeco, Runes: []rune("[["), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 3, 2),
|
||||||
|
}}},
|
||||||
|
{"]]", tok.Token{Kind: tok.Rdeco, Runes: []rune("]]"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 3, 2),
|
||||||
|
}}},
|
||||||
|
{"(", tok.Token{Kind: tok.Lparen, Runes: []rune("("), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{")", tok.Token{Kind: tok.Rparen, Runes: []rune(")"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{"|", tok.Token{Kind: tok.Or, Runes: []rune("|"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{"->", tok.Token{Kind: tok.Arrow, Runes: []rune("->"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 3, 2),
|
||||||
|
}}},
|
||||||
|
{"x // y ", tok.Token{Kind: tok.Identifier, Runes: []rune("x"), Source: tok.Source{
|
||||||
|
S: loc(1, 1, 0), E: loc(1, 2, 1),
|
||||||
|
}}},
|
||||||
|
{`"abc"`, tok.Token{Kind: tok.String, Runes: []rune("abc"), Source: tok.Source{
|
||||||
|
S: loc(1, 2, 1), E: loc(1, 5, 4),
|
||||||
|
}}},
|
||||||
|
{`
|
||||||
|
//
|
||||||
|
ident
|
||||||
|
|
||||||
|
`, tok.Token{Kind: tok.Identifier, Runes: []rune("ident"), Source: tok.Source{
|
||||||
|
S: loc(3, 4, 10), E: loc(3, 9, 15),
|
||||||
|
}}},
|
||||||
|
} {
|
||||||
|
got, err := lexer.Lex([]rune(test.src), filepath)
|
||||||
|
name := fmt.Sprintf(`Lex("%v")`, test.src)
|
||||||
|
switch {
|
||||||
|
case err != nil:
|
||||||
|
t.Errorf("%v returned error: %v", name, err)
|
||||||
|
case len(got) != 1:
|
||||||
|
t.Errorf("%v returned %d tokens: %v", name, len(got), got)
|
||||||
|
case got[0].Kind != test.expect.Kind:
|
||||||
|
t.Errorf(`%v returned unexpected token kind: got "%+v", expected "%+v"`, name, got[0], test.expect)
|
||||||
|
case string(got[0].Runes) != string(test.expect.Runes):
|
||||||
|
t.Errorf(`%v returned unexpected token runes: got "%+v", expected "%+v"`, name, string(got[0].Runes), string(test.expect.Runes))
|
||||||
|
case got[0].Source != test.expect.Source:
|
||||||
|
t.Errorf(`%v returned unexpected token source: got %+v, expected %+v`, name, got[0].Source, test.expect.Source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestErrors(t *testing.T) {
|
||||||
|
type test struct {
|
||||||
|
src string
|
||||||
|
expect string
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range []test{
|
||||||
|
{" \"abc", "test.txt:1:2 unterminated string"},
|
||||||
|
{" \"abc\n", "test.txt:1:2 unterminated string"},
|
||||||
|
{"*", "test.txt:1:1: unexpected '*'"},
|
||||||
|
} {
|
||||||
|
got, err := lexer.Lex([]rune(test.src), "test.txt")
|
||||||
|
if gotErr := err.Error(); test.expect != gotErr {
|
||||||
|
t.Errorf(`Lex() returned error "%+v", expected error "%+v"`, gotErr, test.expect)
|
||||||
|
}
|
||||||
|
if got != nil {
|
||||||
|
t.Errorf("Lex() returned non-nil for error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,119 @@
|
||||||
|
// Copyright 2021 The Tint Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package tok defines tokens that are produced by the Tint intrinsic definition
|
||||||
|
// lexer
|
||||||
|
package tok
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// Kind is an enumerator of token kinds
|
||||||
|
type Kind string
|
||||||
|
|
||||||
|
// Token enumerator types
|
||||||
|
const (
|
||||||
|
InvalidToken Kind = "<invalid>"
|
||||||
|
Identifier Kind = "ident"
|
||||||
|
Integer Kind = "integer"
|
||||||
|
String Kind = "string"
|
||||||
|
Match Kind = "match"
|
||||||
|
Function Kind = "fn"
|
||||||
|
Type Kind = "type"
|
||||||
|
Enum Kind = "enum"
|
||||||
|
Colon Kind = ":"
|
||||||
|
Comma Kind = ","
|
||||||
|
Lt Kind = "<"
|
||||||
|
Gt Kind = ">"
|
||||||
|
Lbrace Kind = "{"
|
||||||
|
Rbrace Kind = "}"
|
||||||
|
Ldeco Kind = "[["
|
||||||
|
Rdeco Kind = "]]"
|
||||||
|
Lparen Kind = "("
|
||||||
|
Rparen Kind = ")"
|
||||||
|
Or Kind = "|"
|
||||||
|
Arrow Kind = "->"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Invalid represents an invalid token
|
||||||
|
var Invalid = Token{Kind: InvalidToken}
|
||||||
|
|
||||||
|
// Location describes a rune location in the source code
|
||||||
|
type Location struct {
|
||||||
|
// 1-based line index
|
||||||
|
Line int
|
||||||
|
// 1-based column index
|
||||||
|
Column int
|
||||||
|
// 0-based rune index
|
||||||
|
Rune int
|
||||||
|
// Optional file path
|
||||||
|
Filepath string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format implements the fmt.Formatter interface
|
||||||
|
func (l Location) Format(w fmt.State, verb rune) {
|
||||||
|
if w.Flag('+') {
|
||||||
|
if l.Filepath != "" {
|
||||||
|
fmt.Fprintf(w, "%v:%v:%v[%v]", l.Filepath, l.Line, l.Column, l.Rune)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(w, "%v:%v[%v]", l.Line, l.Column, l.Rune)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if l.Filepath != "" {
|
||||||
|
fmt.Fprintf(w, "%v:%v:%v", l.Filepath, l.Line, l.Column)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(w, "%v:%v", l.Line, l.Column)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source describes a start and end range in the source code
|
||||||
|
type Source struct {
|
||||||
|
S, E Location
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsValid returns true if the source is valid
|
||||||
|
func (s Source) IsValid() bool {
|
||||||
|
return s.S.Line != 0 && s.S.Column != 0 && s.E.Line != 0 && s.E.Column != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format implements the fmt.Formatter interface
|
||||||
|
func (s Source) Format(w fmt.State, verb rune) {
|
||||||
|
if w.Flag('+') {
|
||||||
|
fmt.Fprint(w, "[")
|
||||||
|
s.S.Format(w, verb)
|
||||||
|
fmt.Fprint(w, " - ")
|
||||||
|
s.E.Format(w, verb)
|
||||||
|
fmt.Fprint(w, "]")
|
||||||
|
} else {
|
||||||
|
s.S.Format(w, verb)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Token describes a parsed token
|
||||||
|
type Token struct {
|
||||||
|
Kind Kind
|
||||||
|
Runes []rune
|
||||||
|
Source Source
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format implements the fmt.Formatter interface
|
||||||
|
func (t Token) Format(w fmt.State, verb rune) {
|
||||||
|
fmt.Fprint(w, "[")
|
||||||
|
t.Source.Format(w, verb)
|
||||||
|
fmt.Fprint(w, " ")
|
||||||
|
fmt.Fprint(w, t.Kind)
|
||||||
|
fmt.Fprint(w, " ")
|
||||||
|
fmt.Fprint(w, string(t.Runes))
|
||||||
|
fmt.Fprint(w, "]")
|
||||||
|
}
|
Loading…
Reference in New Issue