diff --git a/internal/constants/constants.go b/internal/constants/constants.go
index d321bec..4b7a90c 100644
--- a/internal/constants/constants.go
+++ b/internal/constants/constants.go
@@ -12,7 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package constants provides shared data for the libvirt package.
+// Package constants provides shared data for the libvirt package. This file
+// includes only things not generated automatically by the parser that runs on
+// libvirt's remote_protocol.x file - see constants.gen.go for the generated
+// definitions.
 package constants
 
 const (
diff --git a/internal/lvgen/generate.go b/internal/lvgen/generate.go
index b534d23..7f6ff4a 100644
--- a/internal/lvgen/generate.go
+++ b/internal/lvgen/generate.go
@@ -21,7 +21,6 @@ package lvgen
 import (
 	"fmt"
 	"io"
-	"io/ioutil"
 	"os"
 	"strconv"
 	"strings"
@@ -61,6 +60,7 @@ type ConstItem struct {
 	Val  string
 }
 
+// Generator holds all the information parsed out of the protocol file.
 type Generator struct {
 	// Enums holds the list of enums found by the parser.
 	Enums []ConstItem
@@ -224,289 +224,6 @@ func fixAbbrevs(s string) string {
 	return s
 }
 
-// TODO: Move this lexer to its own file?
-
-// eof is returned by the lexer when there's no more input.
-const eof = -1
-
-type item struct {
-	typ          int
-	val          string
-	line, column int
-}
-
-// String will display lexer items for humans to debug. There are some
-// calculations here due to the way goyacc arranges token values; see the
-// generated file y.go for an idea what's going on here, but the basic idea is
-// that the lower token type values are reserved for single-rune tokens, which
-// the lexer reports using the value of the rune itself. Everything else is
-// allocated a range of type value up above all the possible single-rune values.
-func (i item) String() string {
-	tokType := i.typ
-	if tokType >= yyPrivate {
-		if tokType < yyPrivate+len(yyTok2) {
-			tokType = yyTok2[tokType-yyPrivate]
-		}
-	}
-	rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
-	return rv
-}
-
-// Lexer stores the state of this lexer.
-type Lexer struct {
-	input    string    // the string we're scanning.
-	start    int       // start position of the item.
-	pos      int       // current position in the input.
-	line     int       // the current line (for error reporting).
-	column   int       // current position within the current line.
-	width    int       // width of the last rune scanned.
-	items    chan item // channel of scanned lexer items (lexemes).
-	lastItem item      // The last item the lexer handed the parser
-}
-
-// NewLexer will return a new lexer for the passed-in reader.
-func NewLexer(rdr io.Reader) (*Lexer, error) {
-	l := &Lexer{}
-
-	b, err := ioutil.ReadAll(rdr)
-	if err != nil {
-		return nil, err
-	}
-	l.input = string(b)
-	l.items = make(chan item)
-
-	return l, nil
-}
-
-// Run starts the lexer, and should be called in a goroutine.
-func (l *Lexer) Run() {
-	for state := lexText; state != nil; {
-		state = state(l)
-	}
-	close(l.items)
-}
-
-// emit returns a token to the parser.
-func (l *Lexer) emit(t int) {
-	l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
-	l.start = l.pos
-}
-
-// Lex gets the next token.
-func (l *Lexer) Lex(st *yySymType) int {
-	s := <-l.items
-	l.lastItem = s
-	st.val = s.val
-	// fmt.Println("Lex returning", s)
-	return int(s.typ)
-}
-
-// Error is called by the parser when it finds a problem.
-func (l *Lexer) Error(s string) {
-	fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
-	fmt.Printf("error at %q\n", l.lastItem.val)
-}
-
-// errorf is used by the lexer to report errors. It inserts an ERROR token into
-// the items channel, and sets the state to nil, which stops the lexer's state
-// machine.
-func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
-	l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
-	return nil
-}
-
-// next returns the rune at the current location, and advances to the next rune
-// in the input.
-func (l *Lexer) next() (r rune) {
-	if l.pos >= len(l.input) {
-		l.width = 0
-		return eof
-	}
-	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
-	l.pos += l.width
-	l.column++
-	if r == '\n' {
-		l.line++
-		l.column = 0
-	}
-	return r
-}
-
-// ignore discards the current text from start to pos.
-func (l *Lexer) ignore() {
-	l.start = l.pos
-}
-
-// backup moves back one character, but can only be called once per next() call.
-func (l *Lexer) backup() {
-	l.pos -= l.width
-	if l.column > 0 {
-		l.column--
-	} else {
-		l.line--
-	}
-	l.width = 0
-}
-
-// peek looks ahead at the next rune in the stream without consuming it.
-func (l *Lexer) peek() rune {
-	r := l.next()
-	l.backup()
-	return r
-}
-
-// accept will advance to the next rune if it's contained in the string of valid
-// runes passed in by the caller.
-func (l *Lexer) accept(valid string) bool {
-	if strings.IndexRune(valid, l.next()) >= 0 {
-		return true
-	}
-	l.backup()
-	return false
-}
-
-// acceptRun advances over a number of valid runes, stopping as soon as it hits
-// one not on the list.
-func (l *Lexer) acceptRun(valid string) {
-	for strings.IndexRune(valid, l.next()) >= 0 {
-	}
-	l.backup()
-}
-
-// keyword checks whether the current lexeme is a keyword or not. If so it
-// returns the keyword's token id, otherwise it returns IDENTIFIER.
-func (l *Lexer) keyword() int {
-	ident := l.input[l.start:l.pos]
-	tok, ok := keywords[ident]
-	if ok == true {
-		return int(tok)
-	}
-	return IDENTIFIER
-}
-
-// oneRuneToken determines whether a rune is a token. If so it returns the token
-// id and true, otherwise it returns false.
-func (l *Lexer) oneRuneToken(r rune) (int, bool) {
-	if strings.IndexRune(oneRuneTokens, r) >= 0 {
-		return int(r), true
-	}
-
-	return 0, false
-}
-
-// State functions
-type stateFn func(*Lexer) stateFn
-
-// lexText is the master lex routine. The lexer is started in this state.
-func lexText(l *Lexer) stateFn {
-	for {
-		if strings.HasPrefix(l.input[l.pos:], "/*") {
-			return lexBlockComment
-		}
-		r := l.next()
-		if r == eof {
-			break
-		}
-		if unicode.IsSpace(r) {
-			l.ignore()
-			return lexText
-		}
-		if l.column == 1 && r == '%' {
-			l.backup()
-			return lexDirective
-		}
-		if unicode.IsLetter(r) {
-			l.backup()
-			return lexIdent
-		}
-		if unicode.IsNumber(r) || r == '-' {
-			l.backup()
-			return lexNumber
-		}
-		if t, isToken := l.oneRuneToken(r); isToken == true {
-			l.emit(t)
-		}
-	}
-
-	return nil
-}
-
-// lexBlockComment is used when we find a comment marker '/*' in the input.
-func lexBlockComment(l *Lexer) stateFn {
-	for {
-		if strings.HasPrefix(l.input[l.pos:], "*/") {
-			// Found the end. Advance past the '*/' and discard the comment body.
-			l.next()
-			l.next()
-			l.ignore()
-			return lexText
-		}
-		if l.next() == eof {
-			return l.errorf("unterminated block comment")
-		}
-	}
-}
-
-// lexIdent handles identifiers.
-func lexIdent(l *Lexer) stateFn {
-	for {
-		r := l.next()
-		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
-			continue
-		}
-		l.backup()
-		break
-	}
-	// We may have a keyword, so check for that before emitting.
-	l.emit(l.keyword())
-
-	return lexText
-}
-
-// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
-// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
-func lexNumber(l *Lexer) stateFn {
-	// Leading '-' is ok
-	digits := "0123456789"
-	neg := l.accept("-")
-	if !neg {
-		// allow '0x' for hex numbers, as long as there's not a leading '-'.
-		r := l.peek()
-		if r == '0' {
-			l.next()
-			if l.accept("x") {
-				digits = "0123456789ABCDEFabcdef"
-			}
-		}
-	}
-	// followed by any number of digits
-	l.acceptRun(digits)
-	r := l.peek()
-	if unicode.IsLetter(r) {
-		l.next()
-		return l.errorf("invalid number: %q", l.input[l.start:l.pos])
-	}
-	l.emit(CONSTANT)
-	return lexText
-}
-
-// lexDirective handles lines beginning with '%'. These are used to emit C code
-// directly to the output file. For now we're ignoring them, but some of the
-// constants in the protocol file do depend on values from #included header
-// files, so that may need to change.
-func lexDirective(l *Lexer) stateFn {
-	for {
-		r := l.next()
-		if r == '\n' {
-			l.ignore()
-			return lexText
-		}
-		if r == eof {
-			return l.errorf("unterminated directive")
-		}
-	}
-}
-
 //---------------------------------------------------------------------------
 // Routines called by the parser's actions.
 //---------------------------------------------------------------------------
diff --git a/internal/lvgen/lvlexer.go b/internal/lvgen/lvlexer.go
new file mode 100644
index 0000000..7504e43
--- /dev/null
+++ b/internal/lvgen/lvlexer.go
@@ -0,0 +1,305 @@
+// Copyright 2017 The go-libvirt Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lvgen
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// eof is returned by the lexer when there's no more input.
+const eof = -1
+
+type item struct {
+	typ          int
+	val          string
+	line, column int
+}
+
+// String will display lexer items for humans to debug. There are some
+// calculations here due to the way goyacc arranges token values; see the
+// generated file y.go for an idea what's going on here, but the basic idea is
+// that the lower token type values are reserved for single-rune tokens, which
+// the lexer reports using the value of the rune itself. Everything else is
+// allocated a range of type value up above all the possible single-rune values.
+func (i item) String() string {
+	tokType := i.typ
+	if tokType >= yyPrivate {
+		if tokType < yyPrivate+len(yyTok2) {
+			tokType = yyTok2[tokType-yyPrivate]
+		}
+	}
+	rv := fmt.Sprintf("%s %q %d:%d", yyTokname(tokType), i.val, i.line, i.column)
+	return rv
+}
+
+// Lexer stores the state of this lexer.
+type Lexer struct {
+	input    string    // the string we're scanning.
+	start    int       // start position of the item.
+	pos      int       // current position in the input.
+	line     int       // the current line (for error reporting).
+	column   int       // current position within the current line.
+	width    int       // width of the last rune scanned.
+	items    chan item // channel of scanned lexer items (lexemes).
+	lastItem item      // The last item the lexer handed the parser
+}
+
+// NewLexer will return a new lexer for the passed-in reader.
+func NewLexer(rdr io.Reader) (*Lexer, error) {
+	l := &Lexer{}
+
+	b, err := ioutil.ReadAll(rdr)
+	if err != nil {
+		return nil, err
+	}
+	l.input = string(b)
+	l.items = make(chan item)
+
+	return l, nil
+}
+
+// Run starts the lexer, and should be called in a goroutine.
+func (l *Lexer) Run() {
+	for state := lexText; state != nil; {
+		state = state(l)
+	}
+	close(l.items)
+}
+
+// emit returns a token to the parser.
+func (l *Lexer) emit(t int) {
+	l.items <- item{t, l.input[l.start:l.pos], l.line, l.column}
+	l.start = l.pos
+}
+
+// Lex gets the next token.
+func (l *Lexer) Lex(st *yySymType) int {
+	s := <-l.items
+	l.lastItem = s
+	st.val = s.val
+	// fmt.Println("Lex returning", s)
+	return int(s.typ)
+}
+
+// Error is called by the parser when it finds a problem.
+func (l *Lexer) Error(s string) {
+	fmt.Printf("parse error at %d:%d: %v\n", l.lastItem.line+1, l.lastItem.column+1, s)
+	fmt.Printf("error at %q\n", l.lastItem.val)
+}
+
+// errorf is used by the lexer to report errors. It inserts an ERROR token into
+// the items channel, and sets the state to nil, which stops the lexer's state
+// machine.
+func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
+	l.items <- item{ERROR, fmt.Sprintf(format, args), l.line, l.column}
+	return nil
+}
+
+// next returns the rune at the current location, and advances to the next rune
+// in the input.
+func (l *Lexer) next() (r rune) {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	l.column++
+	if r == '\n' {
+		l.line++
+		l.column = 0
+	}
+	return r
+}
+
+// ignore discards the current text from start to pos.
+func (l *Lexer) ignore() {
+	l.start = l.pos
+}
+
+// backup moves back one character, but can only be called once per next() call.
+func (l *Lexer) backup() {
+	l.pos -= l.width
+	if l.column > 0 {
+		l.column--
+	} else {
+		l.line--
+	}
+	l.width = 0
+}
+
+// peek looks ahead at the next rune in the stream without consuming it.
+func (l *Lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+// accept will advance to the next rune if it's contained in the string of valid
+// runes passed in by the caller.
+func (l *Lexer) accept(valid string) bool {
+	if strings.IndexRune(valid, l.next()) >= 0 {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+// acceptRun advances over a number of valid runes, stopping as soon as it hits
+// one not on the list.
+func (l *Lexer) acceptRun(valid string) {
+	for strings.IndexRune(valid, l.next()) >= 0 {
+	}
+	l.backup()
+}
+
+// keyword checks whether the current lexeme is a keyword or not. If so it
+// returns the keyword's token id, otherwise it returns IDENTIFIER.
+func (l *Lexer) keyword() int {
+	ident := l.input[l.start:l.pos]
+	tok, ok := keywords[ident]
+	if ok == true {
+		return int(tok)
+	}
+	return IDENTIFIER
+}
+
+// oneRuneToken determines whether a rune is a token. If so it returns the token
+// id and true, otherwise it returns false.
+func (l *Lexer) oneRuneToken(r rune) (int, bool) {
+	if strings.IndexRune(oneRuneTokens, r) >= 0 {
+		return int(r), true
+	}
+
+	return 0, false
+}
+
+// State functions
+type stateFn func(*Lexer) stateFn
+
+// lexText is the master lex routine. The lexer is started in this state.
+func lexText(l *Lexer) stateFn {
+	for {
+		if strings.HasPrefix(l.input[l.pos:], "/*") {
+			return lexBlockComment
+		}
+		r := l.next()
+		if r == eof {
+			break
+		}
+		if unicode.IsSpace(r) {
+			l.ignore()
+			return lexText
+		}
+		if l.column == 1 && r == '%' {
+			l.backup()
+			return lexDirective
+		}
+		if unicode.IsLetter(r) {
+			l.backup()
+			return lexIdent
+		}
+		if unicode.IsNumber(r) || r == '-' {
+			l.backup()
+			return lexNumber
+		}
+		if t, isToken := l.oneRuneToken(r); isToken == true {
+			l.emit(t)
+		}
+	}
+
+	return nil
+}
+
+// lexBlockComment is used when we find a comment marker '/*' in the input.
+func lexBlockComment(l *Lexer) stateFn {
+	for {
+		if strings.HasPrefix(l.input[l.pos:], "*/") {
+			// Found the end. Advance past the '*/' and discard the comment body.
+			l.next()
+			l.next()
+			l.ignore()
+			return lexText
+		}
+		if l.next() == eof {
+			return l.errorf("unterminated block comment")
+		}
+	}
+}
+
+// lexIdent handles identifiers.
+func lexIdent(l *Lexer) stateFn {
+	for {
+		r := l.next()
+		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
+			continue
+		}
+		l.backup()
+		break
+	}
+	// We may have a keyword, so check for that before emitting.
+	l.emit(l.keyword())
+
+	return lexText
+}
+
+// lexNumber handles decimal and hexadecimal numbers. Decimal numbers may begin
+// with a '-'; hex numbers begin with '0x' and do not accept leading '-'.
+func lexNumber(l *Lexer) stateFn {
+	// Leading '-' is ok
+	digits := "0123456789"
+	neg := l.accept("-")
+	if !neg {
+		// allow '0x' for hex numbers, as long as there's not a leading '-'.
+		r := l.peek()
+		if r == '0' {
+			l.next()
+			if l.accept("x") {
+				digits = "0123456789ABCDEFabcdef"
+			}
+		}
+	}
+	// followed by any number of digits
+	l.acceptRun(digits)
+	r := l.peek()
+	if unicode.IsLetter(r) {
+		l.next()
+		return l.errorf("invalid number: %q", l.input[l.start:l.pos])
+	}
+	l.emit(CONSTANT)
+	return lexText
+}
+
+// lexDirective handles lines beginning with '%'. These are used to emit C code
+// directly to the output file. For now we're ignoring them, but some of the
+// constants in the protocol file do depend on values from #included header
+// files, so that may need to change.
+func lexDirective(l *Lexer) stateFn {
+	for {
+		r := l.next()
+		if r == '\n' {
+			l.ignore()
+			return lexText
+		}
+		if r == eof {
+			return l.errorf("unterminated directive")
+		}
+	}
+}