go-libvirt-plain/internal/lvgen/generate.go
Geoff Hickey 5a6654f07a Generate libvirt constants from libvirt sources.
- Add a yacc-based parser and a hand-written lexer to read the
remote_protocol.x file from libvirt's sources.
- Use the new parser to generate the constants used to communicate with
libvirt.
2017-11-02 19:42:44 -04:00

565 lines
14 KiB
Go

// Copyright 2017 The go-libvirt Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package lvgen
// The libvirt API is divided into several categories. (Gallia est omnis divisa
// in partes tres.) The generator will output code for each category in a
// package underneath the go-libvirt directory.
import (
"fmt"
"io"
"io/ioutil"
"os"
"strconv"
"strings"
"text/template"
"unicode"
"unicode/utf8"
)
var keywords = map[string]int{
"hyper": HYPER,
"int": INT,
"short": SHORT,
"char": CHAR,
"bool": BOOL,
"case": CASE,
"const": CONST,
"default": DEFAULT,
"double": DOUBLE,
"enum": ENUM,
"float": FLOAT,
"opaque": OPAQUE,
"string": STRING,
"struct": STRUCT,
"switch": SWITCH,
"typedef": TYPEDEF,
"union": UNION,
"unsigned": UNSIGNED,
"void": VOID,
"program": PROGRAM,
"version": VERSION,
}
// ConstItem stores an const's symbol and value from the parser. This struct is
// also used for enums.
type ConstItem struct {
Name string
Val string
}
type Generator struct {
// Enums holds the list of enums found by the parser.
Enums []ConstItem
// Consts holds all the const items found by the parser.
Consts []ConstItem
}
// Gen accumulates items as the parser runs, and is then used to produce the
// output.
var Gen Generator
// CurrentEnumVal is the auto-incrementing value assigned to enums that aren't
// explicitly given a value.
var CurrentEnumVal int64
// oneRuneTokens lists the runes the lexer will consider to be tokens when it
// finds them. These are returned to the parser using the integer value of their
// runes.
var oneRuneTokens = `{}[]<>(),=;:*`
// Generate will output go bindings for libvirt. The lvPath parameter should be
// the path to the root of the libvirt source directory to use for the
// generation.
func Generate(proto io.Reader) error {
lexer, err := NewLexer(proto)
if err != nil {
return err
}
go lexer.Run()
parser := yyNewParser()
yyErrorVerbose = true
// Turn this on if you're debugging.
// yyDebug = 3
rv := parser.Parse(lexer)
if rv != 0 {
return fmt.Errorf("failed to parse libvirt protocol: %v", rv)
}
// Generate and write the output.
wr, err := os.Create("../constants/constants.gen.go")
if err != nil {
return err
}
defer wr.Close()
err = genGo(wr)
return err
}
func genGo(wr io.Writer) error {
// TODO: Move this someplace nice.
const consttempl = `/*
* This file generated by internal/lvgen/generate.go. DO NOT EDIT BY HAND!
*
* To regenerate, run 'go generate' in internal/lvgen.
*/
package constants
// libvirt procedure identifiers and other enums
//
// These are libvirt procedure numbers which correspond to each respective
// API call between remote_internal driver and libvirtd. Each procedure is
// identified by a unique number which *may change in any future libvirt
// update*.
//
// Examples:
// REMOTE_PROC_CONNECT_OPEN = 1
// REMOTE_PROC_DOMAIN_DEFINE_XML = 11
// REMOTE_PROC_DOMAIN_MIGRATE_SET_MAX_SPEED = 207,
const (
// From enums:
{{range .Enums}}{{.Name}} = {{.Val}}
{{end}}
// From consts:
{{range .Consts}}{{.Name}} = {{.Val}}
{{end}}
)
`
// Enums and consts from the protocol definition both become go consts in
// the generated code. We'll remove "REMOTE_" and then camel-case the
// name before making each one a go constant.
for ix, en := range Gen.Enums {
Gen.Enums[ix].Name = constNameTransform(en.Name)
}
for ix, en := range Gen.Consts {
Gen.Consts[ix].Name = constNameTransform(en.Name)
}
t := template.Must(template.New("consts").Parse(consttempl))
if err := t.Execute(wr, Gen); err != nil {
return err
}
return nil
}
// constNameTransform changes an upcased, snake-style name like
// REMOTE_PROTOCOL_VERSION to a comfortable Go name like ProtocolVersion. It
// also tries to upcase abbreviations so a name like DOMAIN_GET_XML becomes
// DomainGetXML, not DomainGetXml.
func constNameTransform(name string) string {
nn := fromSnakeToCamel(strings.TrimPrefix(name, "REMOTE_"))
nn = fixAbbrevs(nn)
return nn
}
// fromSnakeToCamel transmutes a snake-cased string to a camel-cased one. All
// runes that follow an underscore are up-cased, and the underscores themselves
// are omitted.
//
// ex: "PROC_DOMAIN_GET_METADATA" -> "ProcDomainGetMetadata"
func fromSnakeToCamel(s string) string {
buf := make([]rune, 0, len(s))
// Start with an upper-cased rune
hump := true
for _, r := range s {
if r == '_' {
hump = true
} else {
var transform func(rune) rune
if hump == true {
transform = unicode.ToUpper
} else {
transform = unicode.ToLower
}
buf = append(buf, transform(r))
hump = false
}
}
return string(buf)
}
// abbrevs is a list of abbreviations which should be all upper-case in a name.
// (This is really just to keep the go linters happy and to produce names that
// are intuitive to a go developer.)
var abbrevs = []string{"Xml", "Io", "Uuid", "Cpu", "Id", "Ip"}
// fixAbbrevs up-cases all instances of anything in the 'abbrevs' array. This
// would be a simple matter, but we don't want to upcase an abbreviation if it's
// actually part of a larger word, so it's not so simple.
func fixAbbrevs(s string) string {
for _, a := range abbrevs {
for loc := 0; ; {
loc = strings.Index(s[loc:], a)
if loc == -1 {
break
}
r := 'A'
if len(a) < len(s[loc:]) {
r, _ = utf8.DecodeRune([]byte(s[loc+len(a):]))
}
if unicode.IsLower(r) == false {
s = s[:loc] + strings.Replace(s[loc:], a, strings.ToUpper(a), 1)
}
loc++
}
}
return s
}
// TODO: Move this lexer to its own file?
// eof is returned by the lexer when there's no more input.
const eof = -1
type item struct {
typ int
val string
line, column int
}
// String will display lexer items for humans to debug. There are some
// calculations here due to the way goyacc arranges token values; see the
// generated file y.go for an idea what's going on here, but the basic idea is
// that the lower token type values are reserved for single-rune tokens, which
// the lexer reports using the value of the rune itself. Everything else is
// allocated a range of type value up above all the possible single-rune values.
func (i item) String() string {
tokType := i.typ
if tokType >= yyPrivate {
if tokType < yyPrivate+len(yyTok2) {
tokType = yyTok2[tokType-yyPrivate]
}
}
rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
return rv
}
// Lexer stores the state of this lexer.
type Lexer struct {
input string // the string we're scanning.
start int // start position of the item.
pos int // current position in the input.
line int // the current line (for error reporting).
column int // current position within the current line.
width int // width of the last rune scanned.
items chan item // channel of scanned lexer items (lexemes).
lastItem item // The last item the lexer handed the parser
}
// NewLexer will return a new lexer for the passed-in reader.
func NewLexer(rdr io.Reader) (*Lexer, error) {
l := &Lexer{}
b, err := ioutil.ReadAll(rdr)
if err != nil {
return nil, err
}
l.input = string(b)
l.items = make(chan item)
return l, nil
}
// Run starts the lexer, and should be called in a goroutine.
func (l *Lexer) Run() {
for state := lexText; state != nil; {
state = state(l)
}
close(l.items)
}
// emit returns a token to the parser.
func (l *Lexer) emit(t int) {
l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
l.start = l.pos
}
// Lex gets the next token.
func (l *Lexer) Lex(st *yySymType) int {
s := <-l.items
l.lastItem = s
st.val = s.val
// fmt.Println("Lex returning", s)
return int(s.typ)
}
// Error is called by the parser when it finds a problem.
func (l *Lexer) Error(s string) {
fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
fmt.Printf("error at %q\n", l.lastItem.val)
}
// errorf is used by the lexer to report errors. It inserts an ERROR token into
// the items channel, and sets the state to nil, which stops the lexer's state
// machine.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
return nil
}
// next returns the rune at the current location, and advances to the next rune
// in the input.
func (l *Lexer) next() (r rune) {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
l.column++
if r == '\n' {
l.line++
l.column = 0
}
return r
}
// ignore discards the current text from start to pos.
func (l *Lexer) ignore() {
l.start = l.pos
}
// backup moves back one character, but can only be called once per next() call.
func (l *Lexer) backup() {
l.pos -= l.width
if l.column > 0 {
l.column--
} else {
l.line--
}
l.width = 0
}
// peek looks ahead at the next rune in the stream without consuming it.
func (l *Lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// accept will advance to the next rune if it's contained in the string of valid
// runes passed in by the caller.
func (l *Lexer) accept(valid string) bool {
if strings.IndexRune(valid, l.next()) >= 0 {
return true
}
l.backup()
return false
}
// acceptRun advances over a number of valid runes, stopping as soon as it hits
// one not on the list.
func (l *Lexer) acceptRun(valid string) {
for strings.IndexRune(valid, l.next()) >= 0 {
}
l.backup()
}
// keyword checks whether the current lexeme is a keyword or not. If so it
// returns the keyword's token id, otherwise it returns IDENTIFIER.
func (l *Lexer) keyword() int {
ident := l.input[l.start:l.pos]
tok, ok := keywords[ident]
if ok == true {
return int(tok)
}
return IDENTIFIER
}
// oneRuneToken determines whether a rune is a token. If so it returns the token
// id and true, otherwise it returns false.
func (l *Lexer) oneRuneToken(r rune) (int, bool) {
if strings.IndexRune(oneRuneTokens, r) >= 0 {
return int(r), true
}
return 0, false
}
// State functions
type stateFn func(*Lexer) stateFn
// lexText is the master lex routine. The lexer is started in this state.
func lexText(l *Lexer) stateFn {
for {
if strings.HasPrefix(l.input[l.pos:], "/*") {
return lexBlockComment
}
r := l.next()
if r == eof {
break
}
if unicode.IsSpace(r) {
l.ignore()
return lexText
}
if l.column == 1 && r == '%' {
l.backup()
return lexDirective
}
if unicode.IsLetter(r) {
l.backup()
return lexIdent
}
if unicode.IsNumber(r) || r == '-' {
l.backup()
return lexNumber
}
if t, isToken := l.oneRuneToken(r); isToken == true {
l.emit(t)
}
}
return nil
}
// lexBlockComment is used when we find a comment marker '/*' in the input.
func lexBlockComment(l *Lexer) stateFn {
for {
if strings.HasPrefix(l.input[l.pos:], "*/") {
// Found the end. Advance past the '*/' and discard the comment body.
l.next()
l.next()
l.ignore()
return lexText
}
if l.next() == eof {
return l.errorf("unterminated block comment")
}
}
}
// lexIdent handles identifiers.
func lexIdent(l *Lexer) stateFn {
for {
r := l.next()
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
continue
}
l.backup()
break
}
// We may have a keyword, so check for that before emitting.
l.emit(l.keyword())
return lexText
}
// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
func lexNumber(l *Lexer) stateFn {
// Leading '-' is ok
digits := "0123456789"
neg := l.accept("-")
if !neg {
// allow '0x' for hex numbers, as long as there's not a leading '-'.
r := l.peek()
if r == '0' {
l.next()
if l.accept("x") {
digits = "0123456789ABCDEFabcdef"
}
}
}
// followed by any number of digits
l.acceptRun(digits)
r := l.peek()
if unicode.IsLetter(r) {
l.next()
return l.errorf("invalid number: %q", l.input[l.start:l.pos])
}
l.emit(CONSTANT)
return lexText
}
// lexDirective handles lines beginning with '%'. These are used to emit C code
// directly to the output file. For now we're ignoring them, but some of the
// constants in the protocol file do depend on values from #included header
// files, so that may need to change.
func lexDirective(l *Lexer) stateFn {
for {
r := l.next()
if r == '\n' {
l.ignore()
return lexText
}
if r == eof {
return l.errorf("unterminated directive")
}
}
}
//---------------------------------------------------------------------------
// Routines called by the parser's actions.
//---------------------------------------------------------------------------
// StartEnum is called when the parser has found a valid enum.
func StartEnum() {
// Set the automatic value var to -1; it will be incremented before being
// assigned to an enum value.
CurrentEnumVal = -1
}
// AddEnum will add a new enum value to the list.
func AddEnum(name, val string) error {
ev, err := parseNumber(val)
if err != nil {
return fmt.Errorf("invalid enum value %v = %v", name, val)
}
return addEnum(name, ev)
}
// AddEnumAutoVal adds an enum to the list, using the automatically-incremented
// value. This is called when the parser finds an enum definition without an
// explicit value.
func AddEnumAutoVal(name string) error {
CurrentEnumVal++
return addEnum(name, CurrentEnumVal)
}
func addEnum(name string, val int64) error {
Gen.Enums = append(Gen.Enums, ConstItem{name, fmt.Sprintf("%d", val)})
CurrentEnumVal = val
return nil
}
// AddConst adds a new constant to the parser's list.
func AddConst(name, val string) error {
_, err := parseNumber(val)
if err != nil {
return fmt.Errorf("invalid const value %v = %v", name, val)
}
Gen.Consts = append(Gen.Consts, ConstItem{name, val})
return nil
}
// parseNumber makes sure that a parsed numerical value can be parsed to a 64-
// bit integer.
func parseNumber(val string) (int64, error) {
base := 10
if strings.HasPrefix(val, "0x") {
base = 16
val = val[2:]
}
n, err := strconv.ParseInt(val, base, 64)
return n, err
}