Move the rpcgen lexer to its own file.
This commit is contained in:
parent
5a6654f07a
commit
6e924657b1
@ -12,7 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package constants provides shared data for the libvirt package.
|
||||
// Package constants provides shared data for the libvirt package. This file
|
||||
// includes only things not generated automatically by the parser that runs on
|
||||
// libvirt's remote_protocol.x file - see constants.gen.go for the generated
|
||||
// definitions.
|
||||
package constants
|
||||
|
||||
const (
|
||||
|
@ -21,7 +21,6 @@ package lvgen
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -61,6 +60,7 @@ type ConstItem struct {
|
||||
Val string
|
||||
}
|
||||
|
||||
// Generator holds all the information parsed out of the protocol file.
|
||||
type Generator struct {
|
||||
// Enums holds the list of enums found by the parser.
|
||||
Enums []ConstItem
|
||||
@ -224,289 +224,6 @@ func fixAbbrevs(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
// TODO: Move this lexer to its own file?
|
||||
|
||||
// eof is returned by the lexer when there's no more input.
|
||||
const eof = -1
|
||||
|
||||
type item struct {
|
||||
typ int
|
||||
val string
|
||||
line, column int
|
||||
}
|
||||
|
||||
// String will display lexer items for humans to debug. There are some
|
||||
// calculations here due to the way goyacc arranges token values; see the
|
||||
// generated file y.go for an idea what's going on here, but the basic idea is
|
||||
// that the lower token type values are reserved for single-rune tokens, which
|
||||
// the lexer reports using the value of the rune itself. Everything else is
|
||||
// allocated a range of type value up above all the possible single-rune values.
|
||||
func (i item) String() string {
|
||||
tokType := i.typ
|
||||
if tokType >= yyPrivate {
|
||||
if tokType < yyPrivate+len(yyTok2) {
|
||||
tokType = yyTok2[tokType-yyPrivate]
|
||||
}
|
||||
}
|
||||
rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
|
||||
return rv
|
||||
}
|
||||
|
||||
// Lexer stores the state of this lexer.
|
||||
type Lexer struct {
|
||||
input string // the string we're scanning.
|
||||
start int // start position of the item.
|
||||
pos int // current position in the input.
|
||||
line int // the current line (for error reporting).
|
||||
column int // current position within the current line.
|
||||
width int // width of the last rune scanned.
|
||||
items chan item // channel of scanned lexer items (lexemes).
|
||||
lastItem item // The last item the lexer handed the parser
|
||||
}
|
||||
|
||||
// NewLexer will return a new lexer for the passed-in reader.
|
||||
func NewLexer(rdr io.Reader) (*Lexer, error) {
|
||||
l := &Lexer{}
|
||||
|
||||
b, err := ioutil.ReadAll(rdr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
l.input = string(b)
|
||||
l.items = make(chan item)
|
||||
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// Run starts the lexer, and should be called in a goroutine.
|
||||
func (l *Lexer) Run() {
|
||||
for state := lexText; state != nil; {
|
||||
state = state(l)
|
||||
}
|
||||
close(l.items)
|
||||
}
|
||||
|
||||
// emit returns a token to the parser.
|
||||
func (l *Lexer) emit(t int) {
|
||||
l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// Lex gets the next token.
|
||||
func (l *Lexer) Lex(st *yySymType) int {
|
||||
s := <-l.items
|
||||
l.lastItem = s
|
||||
st.val = s.val
|
||||
// fmt.Println("Lex returning", s)
|
||||
return int(s.typ)
|
||||
}
|
||||
|
||||
// Error is called by the parser when it finds a problem.
|
||||
func (l *Lexer) Error(s string) {
|
||||
fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
|
||||
fmt.Printf("error at %q\n", l.lastItem.val)
|
||||
}
|
||||
|
||||
// errorf is used by the lexer to report errors. It inserts an ERROR token into
|
||||
// the items channel, and sets the state to nil, which stops the lexer's state
|
||||
// machine.
|
||||
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
||||
l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
|
||||
return nil
|
||||
}
|
||||
|
||||
// next returns the rune at the current location, and advances to the next rune
|
||||
// in the input.
|
||||
func (l *Lexer) next() (r rune) {
|
||||
if l.pos >= len(l.input) {
|
||||
l.width = 0
|
||||
return eof
|
||||
}
|
||||
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
l.pos += l.width
|
||||
l.column++
|
||||
if r == '\n' {
|
||||
l.line++
|
||||
l.column = 0
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// ignore discards the current text from start to pos.
|
||||
func (l *Lexer) ignore() {
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// backup moves back one character, but can only be called once per next() call.
|
||||
func (l *Lexer) backup() {
|
||||
l.pos -= l.width
|
||||
if l.column > 0 {
|
||||
l.column--
|
||||
} else {
|
||||
l.line--
|
||||
}
|
||||
l.width = 0
|
||||
}
|
||||
|
||||
// peek looks ahead at the next rune in the stream without consuming it.
|
||||
func (l *Lexer) peek() rune {
|
||||
r := l.next()
|
||||
l.backup()
|
||||
return r
|
||||
}
|
||||
|
||||
// accept will advance to the next rune if it's contained in the string of valid
|
||||
// runes passed in by the caller.
|
||||
func (l *Lexer) accept(valid string) bool {
|
||||
if strings.IndexRune(valid, l.next()) >= 0 {
|
||||
return true
|
||||
}
|
||||
l.backup()
|
||||
return false
|
||||
}
|
||||
|
||||
// acceptRun advances over a number of valid runes, stopping as soon as it hits
|
||||
// one not on the list.
|
||||
func (l *Lexer) acceptRun(valid string) {
|
||||
for strings.IndexRune(valid, l.next()) >= 0 {
|
||||
}
|
||||
l.backup()
|
||||
}
|
||||
|
||||
// keyword checks whether the current lexeme is a keyword or not. If so it
|
||||
// returns the keyword's token id, otherwise it returns IDENTIFIER.
|
||||
func (l *Lexer) keyword() int {
|
||||
ident := l.input[l.start:l.pos]
|
||||
tok, ok := keywords[ident]
|
||||
if ok == true {
|
||||
return int(tok)
|
||||
}
|
||||
return IDENTIFIER
|
||||
}
|
||||
|
||||
// oneRuneToken determines whether a rune is a token. If so it returns the token
|
||||
// id and true, otherwise it returns false.
|
||||
func (l *Lexer) oneRuneToken(r rune) (int, bool) {
|
||||
if strings.IndexRune(oneRuneTokens, r) >= 0 {
|
||||
return int(r), true
|
||||
}
|
||||
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// State functions
|
||||
type stateFn func(*Lexer) stateFn
|
||||
|
||||
// lexText is the master lex routine. The lexer is started in this state.
|
||||
func lexText(l *Lexer) stateFn {
|
||||
for {
|
||||
if strings.HasPrefix(l.input[l.pos:], "/*") {
|
||||
return lexBlockComment
|
||||
}
|
||||
r := l.next()
|
||||
if r == eof {
|
||||
break
|
||||
}
|
||||
if unicode.IsSpace(r) {
|
||||
l.ignore()
|
||||
return lexText
|
||||
}
|
||||
if l.column == 1 && r == '%' {
|
||||
l.backup()
|
||||
return lexDirective
|
||||
}
|
||||
if unicode.IsLetter(r) {
|
||||
l.backup()
|
||||
return lexIdent
|
||||
}
|
||||
if unicode.IsNumber(r) || r == '-' {
|
||||
l.backup()
|
||||
return lexNumber
|
||||
}
|
||||
if t, isToken := l.oneRuneToken(r); isToken == true {
|
||||
l.emit(t)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// lexBlockComment is used when we find a comment marker '/*' in the input.
|
||||
func lexBlockComment(l *Lexer) stateFn {
|
||||
for {
|
||||
if strings.HasPrefix(l.input[l.pos:], "*/") {
|
||||
// Found the end. Advance past the '*/' and discard the comment body.
|
||||
l.next()
|
||||
l.next()
|
||||
l.ignore()
|
||||
return lexText
|
||||
}
|
||||
if l.next() == eof {
|
||||
return l.errorf("unterminated block comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lexIdent handles identifiers.
|
||||
func lexIdent(l *Lexer) stateFn {
|
||||
for {
|
||||
r := l.next()
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
|
||||
continue
|
||||
}
|
||||
l.backup()
|
||||
break
|
||||
}
|
||||
// We may have a keyword, so check for that before emitting.
|
||||
l.emit(l.keyword())
|
||||
|
||||
return lexText
|
||||
}
|
||||
|
||||
// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
|
||||
// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
|
||||
func lexNumber(l *Lexer) stateFn {
|
||||
// Leading '-' is ok
|
||||
digits := "0123456789"
|
||||
neg := l.accept("-")
|
||||
if !neg {
|
||||
// allow '0x' for hex numbers, as long as there's not a leading '-'.
|
||||
r := l.peek()
|
||||
if r == '0' {
|
||||
l.next()
|
||||
if l.accept("x") {
|
||||
digits = "0123456789ABCDEFabcdef"
|
||||
}
|
||||
}
|
||||
}
|
||||
// followed by any number of digits
|
||||
l.acceptRun(digits)
|
||||
r := l.peek()
|
||||
if unicode.IsLetter(r) {
|
||||
l.next()
|
||||
return l.errorf("invalid number: %q", l.input[l.start:l.pos])
|
||||
}
|
||||
l.emit(CONSTANT)
|
||||
return lexText
|
||||
}
|
||||
|
||||
// lexDirective handles lines beginning with '%'. These are used to emit C code
|
||||
// directly to the output file. For now we're ignoring them, but some of the
|
||||
// constants in the protocol file do depend on values from #included header
|
||||
// files, so that may need to change.
|
||||
func lexDirective(l *Lexer) stateFn {
|
||||
for {
|
||||
r := l.next()
|
||||
if r == '\n' {
|
||||
l.ignore()
|
||||
return lexText
|
||||
}
|
||||
if r == eof {
|
||||
return l.errorf("unterminated directive")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
// Routines called by the parser's actions.
|
||||
//---------------------------------------------------------------------------
|
||||
|
305
internal/lvgen/lvlexer.go
Normal file
305
internal/lvgen/lvlexer.go
Normal file
@ -0,0 +1,305 @@
|
||||
// Copyright 2017 The go-libvirt Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package lvgen
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// eof is returned by the lexer when there's no more input.
|
||||
const eof = -1
|
||||
|
||||
type item struct {
|
||||
typ int
|
||||
val string
|
||||
line, column int
|
||||
}
|
||||
|
||||
// String will display lexer items for humans to debug. There are some
|
||||
// calculations here due to the way goyacc arranges token values; see the
|
||||
// generated file y.go for an idea what's going on here, but the basic idea is
|
||||
// that the lower token type values are reserved for single-rune tokens, which
|
||||
// the lexer reports using the value of the rune itself. Everything else is
|
||||
// allocated a range of type value up above all the possible single-rune values.
|
||||
func (i item) String() string {
|
||||
tokType := i.typ
|
||||
if tokType >= yyPrivate {
|
||||
if tokType < yyPrivate+len(yyTok2) {
|
||||
tokType = yyTok2[tokType-yyPrivate]
|
||||
}
|
||||
}
|
||||
rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
|
||||
return rv
|
||||
}
|
||||
|
||||
// Lexer stores the state of this lexer.
|
||||
type Lexer struct {
|
||||
input string // the string we're scanning.
|
||||
start int // start position of the item.
|
||||
pos int // current position in the input.
|
||||
line int // the current line (for error reporting).
|
||||
column int // current position within the current line.
|
||||
width int // width of the last rune scanned.
|
||||
items chan item // channel of scanned lexer items (lexemes).
|
||||
lastItem item // The last item the lexer handed the parser
|
||||
}
|
||||
|
||||
// NewLexer will return a new lexer for the passed-in reader.
|
||||
func NewLexer(rdr io.Reader) (*Lexer, error) {
|
||||
l := &Lexer{}
|
||||
|
||||
b, err := ioutil.ReadAll(rdr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
l.input = string(b)
|
||||
l.items = make(chan item)
|
||||
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// Run starts the lexer, and should be called in a goroutine.
|
||||
func (l *Lexer) Run() {
|
||||
for state := lexText; state != nil; {
|
||||
state = state(l)
|
||||
}
|
||||
close(l.items)
|
||||
}
|
||||
|
||||
// emit returns a token to the parser.
|
||||
func (l *Lexer) emit(t int) {
|
||||
l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// Lex gets the next token.
|
||||
func (l *Lexer) Lex(st *yySymType) int {
|
||||
s := <-l.items
|
||||
l.lastItem = s
|
||||
st.val = s.val
|
||||
// fmt.Println("Lex returning", s)
|
||||
return int(s.typ)
|
||||
}
|
||||
|
||||
// Error is called by the parser when it finds a problem.
|
||||
func (l *Lexer) Error(s string) {
|
||||
fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
|
||||
fmt.Printf("error at %q\n", l.lastItem.val)
|
||||
}
|
||||
|
||||
// errorf is used by the lexer to report errors. It inserts an ERROR token into
|
||||
// the items channel, and sets the state to nil, which stops the lexer's state
|
||||
// machine.
|
||||
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
||||
l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
|
||||
return nil
|
||||
}
|
||||
|
||||
// next returns the rune at the current location, and advances to the next rune
|
||||
// in the input.
|
||||
func (l *Lexer) next() (r rune) {
|
||||
if l.pos >= len(l.input) {
|
||||
l.width = 0
|
||||
return eof
|
||||
}
|
||||
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
l.pos += l.width
|
||||
l.column++
|
||||
if r == '\n' {
|
||||
l.line++
|
||||
l.column = 0
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// ignore discards the current text from start to pos.
|
||||
func (l *Lexer) ignore() {
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// backup moves back one character, but can only be called once per next() call.
|
||||
func (l *Lexer) backup() {
|
||||
l.pos -= l.width
|
||||
if l.column > 0 {
|
||||
l.column--
|
||||
} else {
|
||||
l.line--
|
||||
}
|
||||
l.width = 0
|
||||
}
|
||||
|
||||
// peek looks ahead at the next rune in the stream without consuming it.
|
||||
func (l *Lexer) peek() rune {
|
||||
r := l.next()
|
||||
l.backup()
|
||||
return r
|
||||
}
|
||||
|
||||
// accept will advance to the next rune if it's contained in the string of valid
|
||||
// runes passed in by the caller.
|
||||
func (l *Lexer) accept(valid string) bool {
|
||||
if strings.IndexRune(valid, l.next()) >= 0 {
|
||||
return true
|
||||
}
|
||||
l.backup()
|
||||
return false
|
||||
}
|
||||
|
||||
// acceptRun advances over a number of valid runes, stopping as soon as it hits
|
||||
// one not on the list.
|
||||
func (l *Lexer) acceptRun(valid string) {
|
||||
for strings.IndexRune(valid, l.next()) >= 0 {
|
||||
}
|
||||
l.backup()
|
||||
}
|
||||
|
||||
// keyword checks whether the current lexeme is a keyword or not. If so it
|
||||
// returns the keyword's token id, otherwise it returns IDENTIFIER.
|
||||
func (l *Lexer) keyword() int {
|
||||
ident := l.input[l.start:l.pos]
|
||||
tok, ok := keywords[ident]
|
||||
if ok == true {
|
||||
return int(tok)
|
||||
}
|
||||
return IDENTIFIER
|
||||
}
|
||||
|
||||
// oneRuneToken determines whether a rune is a token. If so it returns the token
|
||||
// id and true, otherwise it returns false.
|
||||
func (l *Lexer) oneRuneToken(r rune) (int, bool) {
|
||||
if strings.IndexRune(oneRuneTokens, r) >= 0 {
|
||||
return int(r), true
|
||||
}
|
||||
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// State functions
|
||||
type stateFn func(*Lexer) stateFn
|
||||
|
||||
// lexText is the master lex routine. The lexer is started in this state.
|
||||
func lexText(l *Lexer) stateFn {
|
||||
for {
|
||||
if strings.HasPrefix(l.input[l.pos:], "/*") {
|
||||
return lexBlockComment
|
||||
}
|
||||
r := l.next()
|
||||
if r == eof {
|
||||
break
|
||||
}
|
||||
if unicode.IsSpace(r) {
|
||||
l.ignore()
|
||||
return lexText
|
||||
}
|
||||
if l.column == 1 && r == '%' {
|
||||
l.backup()
|
||||
return lexDirective
|
||||
}
|
||||
if unicode.IsLetter(r) {
|
||||
l.backup()
|
||||
return lexIdent
|
||||
}
|
||||
if unicode.IsNumber(r) || r == '-' {
|
||||
l.backup()
|
||||
return lexNumber
|
||||
}
|
||||
if t, isToken := l.oneRuneToken(r); isToken == true {
|
||||
l.emit(t)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// lexBlockComment is used when we find a comment marker '/*' in the input.
|
||||
func lexBlockComment(l *Lexer) stateFn {
|
||||
for {
|
||||
if strings.HasPrefix(l.input[l.pos:], "*/") {
|
||||
// Found the end. Advance past the '*/' and discard the comment body.
|
||||
l.next()
|
||||
l.next()
|
||||
l.ignore()
|
||||
return lexText
|
||||
}
|
||||
if l.next() == eof {
|
||||
return l.errorf("unterminated block comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lexIdent handles identifiers.
|
||||
func lexIdent(l *Lexer) stateFn {
|
||||
for {
|
||||
r := l.next()
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
|
||||
continue
|
||||
}
|
||||
l.backup()
|
||||
break
|
||||
}
|
||||
// We may have a keyword, so check for that before emitting.
|
||||
l.emit(l.keyword())
|
||||
|
||||
return lexText
|
||||
}
|
||||
|
||||
// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
|
||||
// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
|
||||
func lexNumber(l *Lexer) stateFn {
|
||||
// Leading '-' is ok
|
||||
digits := "0123456789"
|
||||
neg := l.accept("-")
|
||||
if !neg {
|
||||
// allow '0x' for hex numbers, as long as there's not a leading '-'.
|
||||
r := l.peek()
|
||||
if r == '0' {
|
||||
l.next()
|
||||
if l.accept("x") {
|
||||
digits = "0123456789ABCDEFabcdef"
|
||||
}
|
||||
}
|
||||
}
|
||||
// followed by any number of digits
|
||||
l.acceptRun(digits)
|
||||
r := l.peek()
|
||||
if unicode.IsLetter(r) {
|
||||
l.next()
|
||||
return l.errorf("invalid number: %q", l.input[l.start:l.pos])
|
||||
}
|
||||
l.emit(CONSTANT)
|
||||
return lexText
|
||||
}
|
||||
|
||||
// lexDirective handles lines beginning with '%'. These are used to emit C code
|
||||
// directly to the output file. For now we're ignoring them, but some of the
|
||||
// constants in the protocol file do depend on values from #included header
|
||||
// files, so that may need to change.
|
||||
func lexDirective(l *Lexer) stateFn {
|
||||
for {
|
||||
r := l.next()
|
||||
if r == '\n' {
|
||||
l.ignore()
|
||||
return lexText
|
||||
}
|
||||
if r == eof {
|
||||
return l.errorf("unterminated directive")
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user