go-libvirt-plain/internal/lvgen/generate.go

// Copyright 2017 The go-libvirt Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package lvgen

// The libvirt API is divided into several categories. (Gallia est omnis divisa
// in partes tres.) The generator will output code for each category in a
// package underneath the go-libvirt directory.

import (
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"strconv"
	"strings"
	"text/template"
	"unicode"
	"unicode/utf8"
)

var keywords = map[string]int{
	"hyper":    HYPER,
	"int":      INT,
	"short":    SHORT,
	"char":     CHAR,
	"bool":     BOOL,
	"case":     CASE,
	"const":    CONST,
	"default":  DEFAULT,
	"double":   DOUBLE,
	"enum":     ENUM,
	"float":    FLOAT,
	"opaque":   OPAQUE,
	"string":   STRING,
	"struct":   STRUCT,
	"switch":   SWITCH,
	"typedef":  TYPEDEF,
	"union":    UNION,
	"unsigned": UNSIGNED,
	"void":     VOID,
	"program":  PROGRAM,
	"version":  VERSION,
}

// ConstItem stores an const's symbol and value from the parser. This struct is
// also used for enums.
type ConstItem struct {
	Name string
	Val  string
}

type Generator struct {
	// Enums holds the list of enums found by the parser.
	Enums []ConstItem
	// Consts holds all the const items found by the parser.
	Consts []ConstItem
}

// Gen accumulates items as the parser runs, and is then used to produce the
// output.
var Gen Generator

// CurrentEnumVal is the auto-incrementing value assigned to enums that aren't
// explicitly given a value.
var CurrentEnumVal int64

// oneRuneTokens lists the runes the lexer will consider to be tokens when it
// finds them. These are returned to the parser using the integer value of their
// runes.
var oneRuneTokens = `{}[]<>(),=;:*`

// Generate will output go bindings for libvirt. The lvPath parameter should be
// the path to the root of the libvirt source directory to use for the
// generation.
func Generate(proto io.Reader) error {
	lexer, err := NewLexer(proto)
	if err != nil {
		return err
	}
	go lexer.Run()
	parser := yyNewParser()
	yyErrorVerbose = true
	// Turn this on if you're debugging.
	// yyDebug = 3
	rv := parser.Parse(lexer)
	if rv != 0 {
		return fmt.Errorf("failed to parse libvirt protocol: %v", rv)
	}

	// Generate and write the output.
	wr, err := os.Create("../constants/constants.gen.go")
	if err != nil {
		return err
	}
	defer wr.Close()

	err = genGo(wr)

	return err
}

func genGo(wr io.Writer) error {
	// TODO: Move this someplace nice.
	const consttempl = `/*
 * This file generated by internal/lvgen/generate.go. DO NOT EDIT BY HAND!
 *
 * To regenerate, run 'go generate' in internal/lvgen.
 */
package constants

// libvirt procedure identifiers and other enums
//
// These are libvirt procedure numbers which correspond to each respective
// API call between remote_internal driver and libvirtd. Each procedure is
// identified by a unique number which *may change in any future libvirt
// update*.
//
// Examples:
//	REMOTE_PROC_CONNECT_OPEN = 1
//	REMOTE_PROC_DOMAIN_DEFINE_XML = 11
//	REMOTE_PROC_DOMAIN_MIGRATE_SET_MAX_SPEED = 207,
const (
	// From enums:
	{{range .Enums}}{{.Name}} = {{.Val}}
	{{end}}

	// From consts:
	{{range .Consts}}{{.Name}} = {{.Val}}
	{{end}}
)
`
	// Enums and consts from the protocol definition both become go consts in
	// the generated code. We'll remove "REMOTE_" and then camel-case the
	// name before making each one a go constant.
	for ix, en := range Gen.Enums {
		Gen.Enums[ix].Name = constNameTransform(en.Name)
	}
	for ix, en := range Gen.Consts {
		Gen.Consts[ix].Name = constNameTransform(en.Name)
	}

	t := template.Must(template.New("consts").Parse(consttempl))
	if err := t.Execute(wr, Gen); err != nil {
		return err
	}
	return nil
}

// constNameTransform changes an upcased, snake-style name like
// REMOTE_PROTOCOL_VERSION to a comfortable Go name like ProtocolVersion. It
// also tries to upcase abbreviations so a name like DOMAIN_GET_XML becomes
// DomainGetXML, not DomainGetXml.
func constNameTransform(name string) string {
	nn := fromSnakeToCamel(strings.TrimPrefix(name, "REMOTE_"))
	nn = fixAbbrevs(nn)
	return nn
}

// fromSnakeToCamel transmutes a snake-cased string to a camel-cased one. All
// runes that follow an underscore are up-cased, and the underscores themselves
// are omitted.
//
// ex: "PROC_DOMAIN_GET_METADATA" -> "ProcDomainGetMetadata"
func fromSnakeToCamel(s string) string {
	buf := make([]rune, 0, len(s))
	// Start with an upper-cased rune
	hump := true

	for _, r := range s {
		if r == '_' {
			hump = true
		} else {
			var transform func(rune) rune
			if hump == true {
				transform = unicode.ToUpper
			} else {
				transform = unicode.ToLower
			}
			buf = append(buf, transform(r))
			hump = false
		}
	}

	return string(buf)
}

// abbrevs is a list of abbreviations which should be all upper-case in a name.
// (This is really just to keep the go linters happy and to produce names that
// are intuitive to a go developer.)
var abbrevs = []string{"Xml", "Io", "Uuid", "Cpu", "Id", "Ip"}

// fixAbbrevs up-cases all instances of anything in the 'abbrevs' array. This
// would be a simple matter, but we don't want to upcase an abbreviation if it's
// actually part of a larger word, so it's not so simple.
func fixAbbrevs(s string) string {
	for _, a := range abbrevs {
		for loc := 0; ; {
			loc = strings.Index(s[loc:], a)
			if loc == -1 {
				break
			}
			r := 'A'
			if len(a) < len(s[loc:]) {
				r, _ = utf8.DecodeRune([]byte(s[loc+len(a):]))
			}
			if unicode.IsLower(r) == false {
				s = s[:loc] + strings.Replace(s[loc:], a, strings.ToUpper(a), 1)
			}
			loc++
		}
	}
	return s
}

// TODO: Move this lexer to its own file?

// eof is returned by the lexer when there's no more input.
const eof = -1

type item struct {
	typ          int
	val          string
	line, column int
}

// String will display lexer items for humans to debug. There are some
// calculations here due to the way goyacc arranges token values; see the
// generated file y.go for an idea what's going on here, but the basic idea is
// that the lower token type values are reserved for single-rune tokens, which
// the lexer reports using the value of the rune itself. Everything else is
// allocated a range of type value up above all the possible single-rune values.
func (i item) String() string {
	tokType := i.typ
	if tokType >= yyPrivate {
		if tokType < yyPrivate+len(yyTok2) {
			tokType = yyTok2[tokType-yyPrivate]
		}
	}
	rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
	return rv
}

// Lexer stores the state of this lexer.
type Lexer struct {
	input    string    // the string we're scanning.
	start    int       // start position of the item.
	pos      int       // current position in the input.
	line     int       // the current line (for error reporting).
	column   int       // current position within the current line.
	width    int       // width of the last rune scanned.
	items    chan item // channel of scanned lexer items (lexemes).
	lastItem item      // The last item the lexer handed the parser
}

// NewLexer will return a new lexer for the passed-in reader.
func NewLexer(rdr io.Reader) (*Lexer, error) {
	l := &Lexer{}

	b, err := ioutil.ReadAll(rdr)
	if err != nil {
		return nil, err
	}
	l.input = string(b)
	l.items = make(chan item)

	return l, nil
}

// Run starts the lexer, and should be called in a goroutine.
func (l *Lexer) Run() {
	for state := lexText; state != nil; {
		state = state(l)
	}
	close(l.items)
}

// emit returns a token to the parser.
func (l *Lexer) emit(t int) {
	l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
	l.start = l.pos
}

// Lex gets the next token.
func (l *Lexer) Lex(st *yySymType) int {
	s := <-l.items
	l.lastItem = s
	st.val = s.val
	// fmt.Println("Lex returning", s)
	return int(s.typ)
}

// Error is called by the parser when it finds a problem.
func (l *Lexer) Error(s string) {
	fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
	fmt.Printf("error at %q\n", l.lastItem.val)
}

// errorf is used by the lexer to report errors. It inserts an ERROR token into
// the items channel, and sets the state to nil, which stops the lexer's state
// machine.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
	l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
	return nil
}

// next returns the rune at the current location, and advances to the next rune
// in the input.
func (l *Lexer) next() (r rune) {
	if l.pos >= len(l.input) {
		l.width = 0
		return eof
	}
	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
	l.pos += l.width
	l.column++
	if r == '\n' {
		l.line++
		l.column = 0
	}
	return r
}

// ignore discards the current text from start to pos.
func (l *Lexer) ignore() {
	l.start = l.pos
}

// backup moves back one character, but can only be called once per next() call.
func (l *Lexer) backup() {
	l.pos -= l.width
	if l.column > 0 {
		l.column--
	} else {
		l.line--
	}
	l.width = 0
}

// peek looks ahead at the next rune in the stream without consuming it.
func (l *Lexer) peek() rune {
	r := l.next()
	l.backup()
	return r
}

// accept will advance to the next rune if it's contained in the string of valid
// runes passed in by the caller.
func (l *Lexer) accept(valid string) bool {
	if strings.IndexRune(valid, l.next()) >= 0 {
		return true
	}
	l.backup()
	return false
}

// acceptRun advances over a number of valid runes, stopping as soon as it hits
// one not on the list.
func (l *Lexer) acceptRun(valid string) {
	for strings.IndexRune(valid, l.next()) >= 0 {
	}
	l.backup()
}

// keyword checks whether the current lexeme is a keyword or not. If so it
// returns the keyword's token id, otherwise it returns IDENTIFIER.
func (l *Lexer) keyword() int {
	ident := l.input[l.start:l.pos]
	tok, ok := keywords[ident]
	if ok == true {
		return int(tok)
	}
	return IDENTIFIER
}

// oneRuneToken determines whether a rune is a token. If so it returns the token
// id and true, otherwise it returns false.
func (l *Lexer) oneRuneToken(r rune) (int, bool) {
	if strings.IndexRune(oneRuneTokens, r) >= 0 {
		return int(r), true
	}

	return 0, false
}

// State functions
type stateFn func(*Lexer) stateFn

// lexText is the master lex routine. The lexer is started in this state.
func lexText(l *Lexer) stateFn {
	for {
		if strings.HasPrefix(l.input[l.pos:], "/*") {
			return lexBlockComment
		}
		r := l.next()
		if r == eof {
			break
		}
		if unicode.IsSpace(r) {
			l.ignore()
			return lexText
		}
		if l.column == 1 && r == '%' {
			l.backup()
			return lexDirective
		}
		if unicode.IsLetter(r) {
			l.backup()
			return lexIdent
		}
		if unicode.IsNumber(r) || r == '-' {
			l.backup()
			return lexNumber
		}
		if t, isToken := l.oneRuneToken(r); isToken == true {
			l.emit(t)
		}
	}

	return nil
}

// lexBlockComment is used when we find a comment marker '/*' in the input.
func lexBlockComment(l *Lexer) stateFn {
	for {
		if strings.HasPrefix(l.input[l.pos:], "*/") {
			// Found the end. Advance past the '*/' and discard the comment body.
			l.next()
			l.next()
			l.ignore()
			return lexText
		}
		if l.next() == eof {
			return l.errorf("unterminated block comment")
		}
	}
}

// lexIdent handles identifiers.
func lexIdent(l *Lexer) stateFn {
	for {
		r := l.next()
		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
			continue
		}
		l.backup()
		break
	}
	// We may have a keyword, so check for that before emitting.
	l.emit(l.keyword())

	return lexText
}

// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
func lexNumber(l *Lexer) stateFn {
	// Leading '-' is ok
	digits := "0123456789"
	neg := l.accept("-")
	if !neg {
		// allow '0x' for hex numbers, as long as there's not a leading '-'.
		r := l.peek()
		if r == '0' {
			l.next()
			if l.accept("x") {
				digits = "0123456789ABCDEFabcdef"
			}
		}
	}
	// followed by any number of digits
	l.acceptRun(digits)
	r := l.peek()
	if unicode.IsLetter(r) {
		l.next()
		return l.errorf("invalid number: %q", l.input[l.start:l.pos])
	}
	l.emit(CONSTANT)
	return lexText
}

// lexDirective handles lines beginning with '%'. These are used to emit C code
// directly to the output file. For now we're ignoring them, but some of the
// constants in the protocol file do depend on values from #included header
// files, so that may need to change.
func lexDirective(l *Lexer) stateFn {
	for {
		r := l.next()
		if r == '\n' {
			l.ignore()
			return lexText
		}
		if r == eof {
			return l.errorf("unterminated directive")
		}
	}
}

//---------------------------------------------------------------------------
// Routines called by the parser's actions.
//---------------------------------------------------------------------------

// StartEnum is called when the parser has found a valid enum.
func StartEnum() {
	// Set the automatic value var to -1; it will be incremented before being
	// assigned to an enum value.
	CurrentEnumVal = -1
}

// AddEnum will add a new enum value to the list.
func AddEnum(name, val string) error {
	ev, err := parseNumber(val)
	if err != nil {
		return fmt.Errorf("invalid enum value %v = %v", name, val)
	}
	return addEnum(name, ev)
}

// AddEnumAutoVal adds an enum to the list, using the automatically-incremented
// value. This is called when the parser finds an enum definition without an
// explicit value.
func AddEnumAutoVal(name string) error {
	CurrentEnumVal++
	return addEnum(name, CurrentEnumVal)
}

func addEnum(name string, val int64) error {
	Gen.Enums = append(Gen.Enums, ConstItem{name, fmt.Sprintf("%d", val)})
	CurrentEnumVal = val
	return nil
}

// AddConst adds a new constant to the parser's list.
func AddConst(name, val string) error {
	_, err := parseNumber(val)
	if err != nil {
		return fmt.Errorf("invalid const value %v = %v", name, val)
	}
	Gen.Consts = append(Gen.Consts, ConstItem{name, val})
	return nil
}

// parseNumber makes sure that a parsed numerical value can be parsed to a 64-
// bit integer.
func parseNumber(val string) (int64, error) {
	base := 10
	if strings.HasPrefix(val, "0x") {
		base = 16
		val = val[2:]
	}
	n, err := strconv.ParseInt(val, base, 64)
	return n, err
}