// Copyright 2017 The go-libvirt Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package lvgen // The libvirt API is divided into several categories. (Gallia est omnis divisa // in partes tres.) The generator will output code for each category in a // package underneath the go-libvirt directory. import ( "fmt" "io" "io/ioutil" "os" "strconv" "strings" "text/template" "unicode" "unicode/utf8" ) var keywords = map[string]int{ "hyper": HYPER, "int": INT, "short": SHORT, "char": CHAR, "bool": BOOL, "case": CASE, "const": CONST, "default": DEFAULT, "double": DOUBLE, "enum": ENUM, "float": FLOAT, "opaque": OPAQUE, "string": STRING, "struct": STRUCT, "switch": SWITCH, "typedef": TYPEDEF, "union": UNION, "unsigned": UNSIGNED, "void": VOID, "program": PROGRAM, "version": VERSION, } // ConstItem stores an const's symbol and value from the parser. This struct is // also used for enums. type ConstItem struct { Name string Val string } type Generator struct { // Enums holds the list of enums found by the parser. Enums []ConstItem // Consts holds all the const items found by the parser. Consts []ConstItem } // Gen accumulates items as the parser runs, and is then used to produce the // output. var Gen Generator // CurrentEnumVal is the auto-incrementing value assigned to enums that aren't // explicitly given a value. var CurrentEnumVal int64 // oneRuneTokens lists the runes the lexer will consider to be tokens when it // finds them. These are returned to the parser using the integer value of their // runes. var oneRuneTokens = `{}[]<>(),=;:*` // Generate will output go bindings for libvirt. The lvPath parameter should be // the path to the root of the libvirt source directory to use for the // generation. func Generate(proto io.Reader) error { lexer, err := NewLexer(proto) if err != nil { return err } go lexer.Run() parser := yyNewParser() yyErrorVerbose = true // Turn this on if you're debugging. // yyDebug = 3 rv := parser.Parse(lexer) if rv != 0 { return fmt.Errorf("failed to parse libvirt protocol: %v", rv) } // Generate and write the output. wr, err := os.Create("../constants/constants.gen.go") if err != nil { return err } defer wr.Close() err = genGo(wr) return err } func genGo(wr io.Writer) error { // TODO: Move this someplace nice. const consttempl = `/* * This file generated by internal/lvgen/generate.go. DO NOT EDIT BY HAND! * * To regenerate, run 'go generate' in internal/lvgen. */ package constants // libvirt procedure identifiers and other enums // // These are libvirt procedure numbers which correspond to each respective // API call between remote_internal driver and libvirtd. Each procedure is // identified by a unique number which *may change in any future libvirt // update*. // // Examples: // REMOTE_PROC_CONNECT_OPEN = 1 // REMOTE_PROC_DOMAIN_DEFINE_XML = 11 // REMOTE_PROC_DOMAIN_MIGRATE_SET_MAX_SPEED = 207, const ( // From enums: {{range .Enums}}{{.Name}} = {{.Val}} {{end}} // From consts: {{range .Consts}}{{.Name}} = {{.Val}} {{end}} ) ` // Enums and consts from the protocol definition both become go consts in // the generated code. We'll remove "REMOTE_" and then camel-case the // name before making each one a go constant. for ix, en := range Gen.Enums { Gen.Enums[ix].Name = constNameTransform(en.Name) } for ix, en := range Gen.Consts { Gen.Consts[ix].Name = constNameTransform(en.Name) } t := template.Must(template.New("consts").Parse(consttempl)) if err := t.Execute(wr, Gen); err != nil { return err } return nil } // constNameTransform changes an upcased, snake-style name like // REMOTE_PROTOCOL_VERSION to a comfortable Go name like ProtocolVersion. It // also tries to upcase abbreviations so a name like DOMAIN_GET_XML becomes // DomainGetXML, not DomainGetXml. func constNameTransform(name string) string { nn := fromSnakeToCamel(strings.TrimPrefix(name, "REMOTE_")) nn = fixAbbrevs(nn) return nn } // fromSnakeToCamel transmutes a snake-cased string to a camel-cased one. All // runes that follow an underscore are up-cased, and the underscores themselves // are omitted. // // ex: "PROC_DOMAIN_GET_METADATA" -> "ProcDomainGetMetadata" func fromSnakeToCamel(s string) string { buf := make([]rune, 0, len(s)) // Start with an upper-cased rune hump := true for _, r := range s { if r == '_' { hump = true } else { var transform func(rune) rune if hump == true { transform = unicode.ToUpper } else { transform = unicode.ToLower } buf = append(buf, transform(r)) hump = false } } return string(buf) } // abbrevs is a list of abbreviations which should be all upper-case in a name. // (This is really just to keep the go linters happy and to produce names that // are intuitive to a go developer.) var abbrevs = []string{"Xml", "Io", "Uuid", "Cpu", "Id", "Ip"} // fixAbbrevs up-cases all instances of anything in the 'abbrevs' array. This // would be a simple matter, but we don't want to upcase an abbreviation if it's // actually part of a larger word, so it's not so simple. func fixAbbrevs(s string) string { for _, a := range abbrevs { for loc := 0; ; { loc = strings.Index(s[loc:], a) if loc == -1 { break } r := 'A' if len(a) < len(s[loc:]) { r, _ = utf8.DecodeRune([]byte(s[loc+len(a):])) } if unicode.IsLower(r) == false { s = s[:loc] + strings.Replace(s[loc:], a, strings.ToUpper(a), 1) } loc++ } } return s } // TODO: Move this lexer to its own file? // eof is returned by the lexer when there's no more input. const eof = -1 type item struct { typ int val string line, column int } // String will display lexer items for humans to debug. There are some // calculations here due to the way goyacc arranges token values; see the // generated file y.go for an idea what's going on here, but the basic idea is // that the lower token type values are reserved for single-rune tokens, which // the lexer reports using the value of the rune itself. Everything else is // allocated a range of type value up above all the possible single-rune values. func (i item) String() string { tokType := i.typ if tokType >= yyPrivate { if tokType < yyPrivate+len(yyTok2) { tokType = yyTok2[tokType-yyPrivate] } } rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column) return rv } // Lexer stores the state of this lexer. type Lexer struct { input string // the string we're scanning. start int // start position of the item. pos int // current position in the input. line int // the current line (for error reporting). column int // current position within the current line. width int // width of the last rune scanned. items chan item // channel of scanned lexer items (lexemes). lastItem item // The last item the lexer handed the parser } // NewLexer will return a new lexer for the passed-in reader. func NewLexer(rdr io.Reader) (*Lexer, error) { l := &Lexer{} b, err := ioutil.ReadAll(rdr) if err != nil { return nil, err } l.input = string(b) l.items = make(chan item) return l, nil } // Run starts the lexer, and should be called in a goroutine. func (l *Lexer) Run() { for state := lexText; state != nil; { state = state(l) } close(l.items) } // emit returns a token to the parser. func (l *Lexer) emit(t int) { l.items <- item{t, l.input[l.start:l.pos], l.line, l.column} l.start = l.pos } // Lex gets the next token. func (l *Lexer) Lex(st *yySymType) int { s := <-l.items l.lastItem = s st.val = s.val // fmt.Println("Lex returning", s) return int(s.typ) } // Error is called by the parser when it finds a problem. func (l *Lexer) Error(s string) { fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s) fmt.Printf("error at %q\n", l.lastItem.val) } // errorf is used by the lexer to report errors. It inserts an ERROR token into // the items channel, and sets the state to nil, which stops the lexer's state // machine. func (l *Lexer) errorf(format string, args ...interface{}) stateFn { l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column} return nil } // next returns the rune at the current location, and advances to the next rune // in the input. func (l *Lexer) next() (r rune) { if l.pos >= len(l.input) { l.width = 0 return eof } r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width l.column++ if r == '\n' { l.line++ l.column = 0 } return r } // ignore discards the current text from start to pos. func (l *Lexer) ignore() { l.start = l.pos } // backup moves back one character, but can only be called once per next() call. func (l *Lexer) backup() { l.pos -= l.width if l.column > 0 { l.column-- } else { l.line-- } l.width = 0 } // peek looks ahead at the next rune in the stream without consuming it. func (l *Lexer) peek() rune { r := l.next() l.backup() return r } // accept will advance to the next rune if it's contained in the string of valid // runes passed in by the caller. func (l *Lexer) accept(valid string) bool { if strings.IndexRune(valid, l.next()) >= 0 { return true } l.backup() return false } // acceptRun advances over a number of valid runes, stopping as soon as it hits // one not on the list. func (l *Lexer) acceptRun(valid string) { for strings.IndexRune(valid, l.next()) >= 0 { } l.backup() } // keyword checks whether the current lexeme is a keyword or not. If so it // returns the keyword's token id, otherwise it returns IDENTIFIER. func (l *Lexer) keyword() int { ident := l.input[l.start:l.pos] tok, ok := keywords[ident] if ok == true { return int(tok) } return IDENTIFIER } // oneRuneToken determines whether a rune is a token. If so it returns the token // id and true, otherwise it returns false. func (l *Lexer) oneRuneToken(r rune) (int, bool) { if strings.IndexRune(oneRuneTokens, r) >= 0 { return int(r), true } return 0, false } // State functions type stateFn func(*Lexer) stateFn // lexText is the master lex routine. The lexer is started in this state. func lexText(l *Lexer) stateFn { for { if strings.HasPrefix(l.input[l.pos:], "/*") { return lexBlockComment } r := l.next() if r == eof { break } if unicode.IsSpace(r) { l.ignore() return lexText } if l.column == 1 && r == '%' { l.backup() return lexDirective } if unicode.IsLetter(r) { l.backup() return lexIdent } if unicode.IsNumber(r) || r == '-' { l.backup() return lexNumber } if t, isToken := l.oneRuneToken(r); isToken == true { l.emit(t) } } return nil } // lexBlockComment is used when we find a comment marker '/*' in the input. func lexBlockComment(l *Lexer) stateFn { for { if strings.HasPrefix(l.input[l.pos:], "*/") { // Found the end. Advance past the '*/' and discard the comment body. l.next() l.next() l.ignore() return lexText } if l.next() == eof { return l.errorf("unterminated block comment") } } } // lexIdent handles identifiers. func lexIdent(l *Lexer) stateFn { for { r := l.next() if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' { continue } l.backup() break } // We may have a keyword, so check for that before emitting. l.emit(l.keyword()) return lexText } // lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin // with a '-'; hex numbers begin with '0x' and do not accept leading '-'. func lexNumber(l *Lexer) stateFn { // Leading '-' is ok digits := "0123456789" neg := l.accept("-") if !neg { // allow '0x' for hex numbers, as long as there's not a leading '-'. r := l.peek() if r == '0' { l.next() if l.accept("x") { digits = "0123456789ABCDEFabcdef" } } } // followed by any number of digits l.acceptRun(digits) r := l.peek() if unicode.IsLetter(r) { l.next() return l.errorf("invalid number: %q", l.input[l.start:l.pos]) } l.emit(CONSTANT) return lexText } // lexDirective handles lines beginning with '%'. These are used to emit C code // directly to the output file. For now we're ignoring them, but some of the // constants in the protocol file do depend on values from #included header // files, so that may need to change. func lexDirective(l *Lexer) stateFn { for { r := l.next() if r == '\n' { l.ignore() return lexText } if r == eof { return l.errorf("unterminated directive") } } } //--------------------------------------------------------------------------- // Routines called by the parser's actions. //--------------------------------------------------------------------------- // StartEnum is called when the parser has found a valid enum. func StartEnum() { // Set the automatic value var to -1; it will be incremented before being // assigned to an enum value. CurrentEnumVal = -1 } // AddEnum will add a new enum value to the list. func AddEnum(name, val string) error { ev, err := parseNumber(val) if err != nil { return fmt.Errorf("invalid enum value %v = %v", name, val) } return addEnum(name, ev) } // AddEnumAutoVal adds an enum to the list, using the automatically-incremented // value. This is called when the parser finds an enum definition without an // explicit value. func AddEnumAutoVal(name string) error { CurrentEnumVal++ return addEnum(name, CurrentEnumVal) } func addEnum(name string, val int64) error { Gen.Enums = append(Gen.Enums, ConstItem{name, fmt.Sprintf("%d", val)}) CurrentEnumVal = val return nil } // AddConst adds a new constant to the parser's list. func AddConst(name, val string) error { _, err := parseNumber(val) if err != nil { return fmt.Errorf("invalid const value %v = %v", name, val) } Gen.Consts = append(Gen.Consts, ConstItem{name, val}) return nil } // parseNumber makes sure that a parsed numerical value can be parsed to a 64- // bit integer. func parseNumber(val string) (int64, error) { base := 10 if strings.HasPrefix(val, "0x") { base = 16 val = val[2:] } n, err := strconv.ParseInt(val, base, 64) return n, err }