5a6654f07a
- Add a yacc-based parser and a hand-written lexer to read the remote_protocol.x file from libvirt's sources. - Use the new parser to generate the constants used to communicate with libvirt.
565 lines
14 KiB
Go
565 lines
14 KiB
Go
// Copyright 2017 The go-libvirt Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package lvgen
|
|
|
|
// The libvirt API is divided into several categories. (Gallia est omnis divisa
|
|
// in partes tres.) The generator will output code for each category in a
|
|
// package underneath the go-libvirt directory.
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"text/template"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var keywords = map[string]int{
|
|
"hyper": HYPER,
|
|
"int": INT,
|
|
"short": SHORT,
|
|
"char": CHAR,
|
|
"bool": BOOL,
|
|
"case": CASE,
|
|
"const": CONST,
|
|
"default": DEFAULT,
|
|
"double": DOUBLE,
|
|
"enum": ENUM,
|
|
"float": FLOAT,
|
|
"opaque": OPAQUE,
|
|
"string": STRING,
|
|
"struct": STRUCT,
|
|
"switch": SWITCH,
|
|
"typedef": TYPEDEF,
|
|
"union": UNION,
|
|
"unsigned": UNSIGNED,
|
|
"void": VOID,
|
|
"program": PROGRAM,
|
|
"version": VERSION,
|
|
}
|
|
|
|
// ConstItem stores an const's symbol and value from the parser. This struct is
|
|
// also used for enums.
|
|
type ConstItem struct {
|
|
Name string
|
|
Val string
|
|
}
|
|
|
|
type Generator struct {
|
|
// Enums holds the list of enums found by the parser.
|
|
Enums []ConstItem
|
|
// Consts holds all the const items found by the parser.
|
|
Consts []ConstItem
|
|
}
|
|
|
|
// Gen accumulates items as the parser runs, and is then used to produce the
|
|
// output.
|
|
var Gen Generator
|
|
|
|
// CurrentEnumVal is the auto-incrementing value assigned to enums that aren't
|
|
// explicitly given a value.
|
|
var CurrentEnumVal int64
|
|
|
|
// oneRuneTokens lists the runes the lexer will consider to be tokens when it
|
|
// finds them. These are returned to the parser using the integer value of their
|
|
// runes.
|
|
var oneRuneTokens = `{}[]<>(),=;:*`
|
|
|
|
// Generate will output go bindings for libvirt. The lvPath parameter should be
|
|
// the path to the root of the libvirt source directory to use for the
|
|
// generation.
|
|
func Generate(proto io.Reader) error {
|
|
lexer, err := NewLexer(proto)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
go lexer.Run()
|
|
parser := yyNewParser()
|
|
yyErrorVerbose = true
|
|
// Turn this on if you're debugging.
|
|
// yyDebug = 3
|
|
rv := parser.Parse(lexer)
|
|
if rv != 0 {
|
|
return fmt.Errorf("failed to parse libvirt protocol: %v", rv)
|
|
}
|
|
|
|
// Generate and write the output.
|
|
wr, err := os.Create("../constants/constants.gen.go")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer wr.Close()
|
|
|
|
err = genGo(wr)
|
|
|
|
return err
|
|
}
|
|
|
|
func genGo(wr io.Writer) error {
|
|
// TODO: Move this someplace nice.
|
|
const consttempl = `/*
|
|
* This file generated by internal/lvgen/generate.go. DO NOT EDIT BY HAND!
|
|
*
|
|
* To regenerate, run 'go generate' in internal/lvgen.
|
|
*/
|
|
package constants
|
|
|
|
// libvirt procedure identifiers and other enums
|
|
//
|
|
// These are libvirt procedure numbers which correspond to each respective
|
|
// API call between remote_internal driver and libvirtd. Each procedure is
|
|
// identified by a unique number which *may change in any future libvirt
|
|
// update*.
|
|
//
|
|
// Examples:
|
|
// REMOTE_PROC_CONNECT_OPEN = 1
|
|
// REMOTE_PROC_DOMAIN_DEFINE_XML = 11
|
|
// REMOTE_PROC_DOMAIN_MIGRATE_SET_MAX_SPEED = 207,
|
|
const (
|
|
// From enums:
|
|
{{range .Enums}}{{.Name}} = {{.Val}}
|
|
{{end}}
|
|
|
|
// From consts:
|
|
{{range .Consts}}{{.Name}} = {{.Val}}
|
|
{{end}}
|
|
)
|
|
`
|
|
// Enums and consts from the protocol definition both become go consts in
|
|
// the generated code. We'll remove "REMOTE_" and then camel-case the
|
|
// name before making each one a go constant.
|
|
for ix, en := range Gen.Enums {
|
|
Gen.Enums[ix].Name = constNameTransform(en.Name)
|
|
}
|
|
for ix, en := range Gen.Consts {
|
|
Gen.Consts[ix].Name = constNameTransform(en.Name)
|
|
}
|
|
|
|
t := template.Must(template.New("consts").Parse(consttempl))
|
|
if err := t.Execute(wr, Gen); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// constNameTransform changes an upcased, snake-style name like
|
|
// REMOTE_PROTOCOL_VERSION to a comfortable Go name like ProtocolVersion. It
|
|
// also tries to upcase abbreviations so a name like DOMAIN_GET_XML becomes
|
|
// DomainGetXML, not DomainGetXml.
|
|
func constNameTransform(name string) string {
|
|
nn := fromSnakeToCamel(strings.TrimPrefix(name, "REMOTE_"))
|
|
nn = fixAbbrevs(nn)
|
|
return nn
|
|
}
|
|
|
|
// fromSnakeToCamel transmutes a snake-cased string to a camel-cased one. All
|
|
// runes that follow an underscore are up-cased, and the underscores themselves
|
|
// are omitted.
|
|
//
|
|
// ex: "PROC_DOMAIN_GET_METADATA" -> "ProcDomainGetMetadata"
|
|
func fromSnakeToCamel(s string) string {
|
|
buf := make([]rune, 0, len(s))
|
|
// Start with an upper-cased rune
|
|
hump := true
|
|
|
|
for _, r := range s {
|
|
if r == '_' {
|
|
hump = true
|
|
} else {
|
|
var transform func(rune) rune
|
|
if hump == true {
|
|
transform = unicode.ToUpper
|
|
} else {
|
|
transform = unicode.ToLower
|
|
}
|
|
buf = append(buf, transform(r))
|
|
hump = false
|
|
}
|
|
}
|
|
|
|
return string(buf)
|
|
}
|
|
|
|
// abbrevs is a list of abbreviations which should be all upper-case in a name.
|
|
// (This is really just to keep the go linters happy and to produce names that
|
|
// are intuitive to a go developer.)
|
|
var abbrevs = []string{"Xml", "Io", "Uuid", "Cpu", "Id", "Ip"}
|
|
|
|
// fixAbbrevs up-cases all instances of anything in the 'abbrevs' array. This
|
|
// would be a simple matter, but we don't want to upcase an abbreviation if it's
|
|
// actually part of a larger word, so it's not so simple.
|
|
func fixAbbrevs(s string) string {
|
|
for _, a := range abbrevs {
|
|
for loc := 0; ; {
|
|
loc = strings.Index(s[loc:], a)
|
|
if loc == -1 {
|
|
break
|
|
}
|
|
r := 'A'
|
|
if len(a) < len(s[loc:]) {
|
|
r, _ = utf8.DecodeRune([]byte(s[loc+len(a):]))
|
|
}
|
|
if unicode.IsLower(r) == false {
|
|
s = s[:loc] + strings.Replace(s[loc:], a, strings.ToUpper(a), 1)
|
|
}
|
|
loc++
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// TODO: Move this lexer to its own file?
|
|
|
|
// eof is returned by the lexer when there's no more input.
|
|
const eof = -1
|
|
|
|
type item struct {
|
|
typ int
|
|
val string
|
|
line, column int
|
|
}
|
|
|
|
// String will display lexer items for humans to debug. There are some
|
|
// calculations here due to the way goyacc arranges token values; see the
|
|
// generated file y.go for an idea what's going on here, but the basic idea is
|
|
// that the lower token type values are reserved for single-rune tokens, which
|
|
// the lexer reports using the value of the rune itself. Everything else is
|
|
// allocated a range of type value up above all the possible single-rune values.
|
|
func (i item) String() string {
|
|
tokType := i.typ
|
|
if tokType >= yyPrivate {
|
|
if tokType < yyPrivate+len(yyTok2) {
|
|
tokType = yyTok2[tokType-yyPrivate]
|
|
}
|
|
}
|
|
rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
|
|
return rv
|
|
}
|
|
|
|
// Lexer stores the state of this lexer.
|
|
type Lexer struct {
|
|
input string // the string we're scanning.
|
|
start int // start position of the item.
|
|
pos int // current position in the input.
|
|
line int // the current line (for error reporting).
|
|
column int // current position within the current line.
|
|
width int // width of the last rune scanned.
|
|
items chan item // channel of scanned lexer items (lexemes).
|
|
lastItem item // The last item the lexer handed the parser
|
|
}
|
|
|
|
// NewLexer will return a new lexer for the passed-in reader.
|
|
func NewLexer(rdr io.Reader) (*Lexer, error) {
|
|
l := &Lexer{}
|
|
|
|
b, err := ioutil.ReadAll(rdr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
l.input = string(b)
|
|
l.items = make(chan item)
|
|
|
|
return l, nil
|
|
}
|
|
|
|
// Run starts the lexer, and should be called in a goroutine.
|
|
func (l *Lexer) Run() {
|
|
for state := lexText; state != nil; {
|
|
state = state(l)
|
|
}
|
|
close(l.items)
|
|
}
|
|
|
|
// emit returns a token to the parser.
|
|
func (l *Lexer) emit(t int) {
|
|
l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
|
|
l.start = l.pos
|
|
}
|
|
|
|
// Lex gets the next token.
|
|
func (l *Lexer) Lex(st *yySymType) int {
|
|
s := <-l.items
|
|
l.lastItem = s
|
|
st.val = s.val
|
|
// fmt.Println("Lex returning", s)
|
|
return int(s.typ)
|
|
}
|
|
|
|
// Error is called by the parser when it finds a problem.
|
|
func (l *Lexer) Error(s string) {
|
|
fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
|
|
fmt.Printf("error at %q\n", l.lastItem.val)
|
|
}
|
|
|
|
// errorf is used by the lexer to report errors. It inserts an ERROR token into
|
|
// the items channel, and sets the state to nil, which stops the lexer's state
|
|
// machine.
|
|
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
|
l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
|
|
return nil
|
|
}
|
|
|
|
// next returns the rune at the current location, and advances to the next rune
|
|
// in the input.
|
|
func (l *Lexer) next() (r rune) {
|
|
if l.pos >= len(l.input) {
|
|
l.width = 0
|
|
return eof
|
|
}
|
|
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
|
l.pos += l.width
|
|
l.column++
|
|
if r == '\n' {
|
|
l.line++
|
|
l.column = 0
|
|
}
|
|
return r
|
|
}
|
|
|
|
// ignore discards the current text from start to pos.
|
|
func (l *Lexer) ignore() {
|
|
l.start = l.pos
|
|
}
|
|
|
|
// backup moves back one character, but can only be called once per next() call.
|
|
func (l *Lexer) backup() {
|
|
l.pos -= l.width
|
|
if l.column > 0 {
|
|
l.column--
|
|
} else {
|
|
l.line--
|
|
}
|
|
l.width = 0
|
|
}
|
|
|
|
// peek looks ahead at the next rune in the stream without consuming it.
|
|
func (l *Lexer) peek() rune {
|
|
r := l.next()
|
|
l.backup()
|
|
return r
|
|
}
|
|
|
|
// accept will advance to the next rune if it's contained in the string of valid
|
|
// runes passed in by the caller.
|
|
func (l *Lexer) accept(valid string) bool {
|
|
if strings.IndexRune(valid, l.next()) >= 0 {
|
|
return true
|
|
}
|
|
l.backup()
|
|
return false
|
|
}
|
|
|
|
// acceptRun advances over a number of valid runes, stopping as soon as it hits
|
|
// one not on the list.
|
|
func (l *Lexer) acceptRun(valid string) {
|
|
for strings.IndexRune(valid, l.next()) >= 0 {
|
|
}
|
|
l.backup()
|
|
}
|
|
|
|
// keyword checks whether the current lexeme is a keyword or not. If so it
|
|
// returns the keyword's token id, otherwise it returns IDENTIFIER.
|
|
func (l *Lexer) keyword() int {
|
|
ident := l.input[l.start:l.pos]
|
|
tok, ok := keywords[ident]
|
|
if ok == true {
|
|
return int(tok)
|
|
}
|
|
return IDENTIFIER
|
|
}
|
|
|
|
// oneRuneToken determines whether a rune is a token. If so it returns the token
|
|
// id and true, otherwise it returns false.
|
|
func (l *Lexer) oneRuneToken(r rune) (int, bool) {
|
|
if strings.IndexRune(oneRuneTokens, r) >= 0 {
|
|
return int(r), true
|
|
}
|
|
|
|
return 0, false
|
|
}
|
|
|
|
// State functions
|
|
type stateFn func(*Lexer) stateFn
|
|
|
|
// lexText is the master lex routine. The lexer is started in this state.
|
|
func lexText(l *Lexer) stateFn {
|
|
for {
|
|
if strings.HasPrefix(l.input[l.pos:], "/*") {
|
|
return lexBlockComment
|
|
}
|
|
r := l.next()
|
|
if r == eof {
|
|
break
|
|
}
|
|
if unicode.IsSpace(r) {
|
|
l.ignore()
|
|
return lexText
|
|
}
|
|
if l.column == 1 && r == '%' {
|
|
l.backup()
|
|
return lexDirective
|
|
}
|
|
if unicode.IsLetter(r) {
|
|
l.backup()
|
|
return lexIdent
|
|
}
|
|
if unicode.IsNumber(r) || r == '-' {
|
|
l.backup()
|
|
return lexNumber
|
|
}
|
|
if t, isToken := l.oneRuneToken(r); isToken == true {
|
|
l.emit(t)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// lexBlockComment is used when we find a comment marker '/*' in the input.
|
|
func lexBlockComment(l *Lexer) stateFn {
|
|
for {
|
|
if strings.HasPrefix(l.input[l.pos:], "*/") {
|
|
// Found the end. Advance past the '*/' and discard the comment body.
|
|
l.next()
|
|
l.next()
|
|
l.ignore()
|
|
return lexText
|
|
}
|
|
if l.next() == eof {
|
|
return l.errorf("unterminated block comment")
|
|
}
|
|
}
|
|
}
|
|
|
|
// lexIdent handles identifiers.
|
|
func lexIdent(l *Lexer) stateFn {
|
|
for {
|
|
r := l.next()
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
|
|
continue
|
|
}
|
|
l.backup()
|
|
break
|
|
}
|
|
// We may have a keyword, so check for that before emitting.
|
|
l.emit(l.keyword())
|
|
|
|
return lexText
|
|
}
|
|
|
|
// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
|
|
// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
|
|
func lexNumber(l *Lexer) stateFn {
|
|
// Leading '-' is ok
|
|
digits := "0123456789"
|
|
neg := l.accept("-")
|
|
if !neg {
|
|
// allow '0x' for hex numbers, as long as there's not a leading '-'.
|
|
r := l.peek()
|
|
if r == '0' {
|
|
l.next()
|
|
if l.accept("x") {
|
|
digits = "0123456789ABCDEFabcdef"
|
|
}
|
|
}
|
|
}
|
|
// followed by any number of digits
|
|
l.acceptRun(digits)
|
|
r := l.peek()
|
|
if unicode.IsLetter(r) {
|
|
l.next()
|
|
return l.errorf("invalid number: %q", l.input[l.start:l.pos])
|
|
}
|
|
l.emit(CONSTANT)
|
|
return lexText
|
|
}
|
|
|
|
// lexDirective handles lines beginning with '%'. These are used to emit C code
|
|
// directly to the output file. For now we're ignoring them, but some of the
|
|
// constants in the protocol file do depend on values from #included header
|
|
// files, so that may need to change.
|
|
func lexDirective(l *Lexer) stateFn {
|
|
for {
|
|
r := l.next()
|
|
if r == '\n' {
|
|
l.ignore()
|
|
return lexText
|
|
}
|
|
if r == eof {
|
|
return l.errorf("unterminated directive")
|
|
}
|
|
}
|
|
}
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Routines called by the parser's actions.
|
|
//---------------------------------------------------------------------------
|
|
|
|
// StartEnum is called when the parser has found a valid enum.
|
|
func StartEnum() {
|
|
// Set the automatic value var to -1; it will be incremented before being
|
|
// assigned to an enum value.
|
|
CurrentEnumVal = -1
|
|
}
|
|
|
|
// AddEnum will add a new enum value to the list.
|
|
func AddEnum(name, val string) error {
|
|
ev, err := parseNumber(val)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid enum value %v = %v", name, val)
|
|
}
|
|
return addEnum(name, ev)
|
|
}
|
|
|
|
// AddEnumAutoVal adds an enum to the list, using the automatically-incremented
|
|
// value. This is called when the parser finds an enum definition without an
|
|
// explicit value.
|
|
func AddEnumAutoVal(name string) error {
|
|
CurrentEnumVal++
|
|
return addEnum(name, CurrentEnumVal)
|
|
}
|
|
|
|
func addEnum(name string, val int64) error {
|
|
Gen.Enums = append(Gen.Enums, ConstItem{name, fmt.Sprintf("%d", val)})
|
|
CurrentEnumVal = val
|
|
return nil
|
|
}
|
|
|
|
// AddConst adds a new constant to the parser's list.
|
|
func AddConst(name, val string) error {
|
|
_, err := parseNumber(val)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid const value %v = %v", name, val)
|
|
}
|
|
Gen.Consts = append(Gen.Consts, ConstItem{name, val})
|
|
return nil
|
|
}
|
|
|
|
// parseNumber makes sure that a parsed numerical value can be parsed to a 64-
|
|
// bit integer.
|
|
func parseNumber(val string) (int64, error) {
|
|
base := 10
|
|
if strings.HasPrefix(val, "0x") {
|
|
base = 16
|
|
val = val[2:]
|
|
}
|
|
n, err := strconv.ParseInt(val, base, 64)
|
|
return n, err
|
|
}
|