808 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			808 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Go
		
	
	
	
| // TOML lexer.
 | |
| //
 | |
| // Written using the principles developed by Rob Pike in
 | |
| // http://www.youtube.com/watch?v=HxaD_trXwRE
 | |
| 
 | |
| package toml
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"regexp"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| )
 | |
| 
 | |
| var dateRegexp *regexp.Regexp
 | |
| 
 | |
| // Define state functions
 | |
| type tomlLexStateFn func() tomlLexStateFn
 | |
| 
 | |
| // Define lexer
 | |
| type tomlLexer struct {
 | |
| 	inputIdx          int
 | |
| 	input             []rune // Textual source
 | |
| 	currentTokenStart int
 | |
| 	currentTokenStop  int
 | |
| 	tokens            []token
 | |
| 	brackets          []rune
 | |
| 	line              int
 | |
| 	col               int
 | |
| 	endbufferLine     int
 | |
| 	endbufferCol      int
 | |
| }
 | |
| 
 | |
| // Basic read operations on input
 | |
| 
 | |
| func (l *tomlLexer) read() rune {
 | |
| 	r := l.peek()
 | |
| 	if r == '\n' {
 | |
| 		l.endbufferLine++
 | |
| 		l.endbufferCol = 1
 | |
| 	} else {
 | |
| 		l.endbufferCol++
 | |
| 	}
 | |
| 	l.inputIdx++
 | |
| 	return r
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) next() rune {
 | |
| 	r := l.read()
 | |
| 
 | |
| 	if r != eof {
 | |
| 		l.currentTokenStop++
 | |
| 	}
 | |
| 	return r
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) ignore() {
 | |
| 	l.currentTokenStart = l.currentTokenStop
 | |
| 	l.line = l.endbufferLine
 | |
| 	l.col = l.endbufferCol
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) skip() {
 | |
| 	l.next()
 | |
| 	l.ignore()
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) fastForward(n int) {
 | |
| 	for i := 0; i < n; i++ {
 | |
| 		l.next()
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) emitWithValue(t tokenType, value string) {
 | |
| 	l.tokens = append(l.tokens, token{
 | |
| 		Position: Position{l.line, l.col},
 | |
| 		typ:      t,
 | |
| 		val:      value,
 | |
| 	})
 | |
| 	l.ignore()
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) emit(t tokenType) {
 | |
| 	l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) peek() rune {
 | |
| 	if l.inputIdx >= len(l.input) {
 | |
| 		return eof
 | |
| 	}
 | |
| 	return l.input[l.inputIdx]
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) peekString(size int) string {
 | |
| 	maxIdx := len(l.input)
 | |
| 	upperIdx := l.inputIdx + size // FIXME: potential overflow
 | |
| 	if upperIdx > maxIdx {
 | |
| 		upperIdx = maxIdx
 | |
| 	}
 | |
| 	return string(l.input[l.inputIdx:upperIdx])
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) follow(next string) bool {
 | |
| 	return next == l.peekString(len(next))
 | |
| }
 | |
| 
 | |
| // Error management
 | |
| 
 | |
| func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
 | |
| 	l.tokens = append(l.tokens, token{
 | |
| 		Position: Position{l.line, l.col},
 | |
| 		typ:      tokenError,
 | |
| 		val:      fmt.Sprintf(format, args...),
 | |
| 	})
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // State functions
 | |
| 
 | |
| func (l *tomlLexer) lexVoid() tomlLexStateFn {
 | |
| 	for {
 | |
| 		next := l.peek()
 | |
| 		switch next {
 | |
| 		case '}': // after '{'
 | |
| 			return l.lexRightCurlyBrace
 | |
| 		case '[':
 | |
| 			return l.lexTableKey
 | |
| 		case '#':
 | |
| 			return l.lexComment(l.lexVoid)
 | |
| 		case '=':
 | |
| 			return l.lexEqual
 | |
| 		case '\r':
 | |
| 			fallthrough
 | |
| 		case '\n':
 | |
| 			l.skip()
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		if isSpace(next) {
 | |
| 			l.skip()
 | |
| 		}
 | |
| 
 | |
| 		if isKeyStartChar(next) {
 | |
| 			return l.lexKey
 | |
| 		}
 | |
| 
 | |
| 		if next == eof {
 | |
| 			l.next()
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	l.emit(tokenEOF)
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexRvalue() tomlLexStateFn {
 | |
| 	for {
 | |
| 		next := l.peek()
 | |
| 		switch next {
 | |
| 		case '.':
 | |
| 			return l.errorf("cannot start float with a dot")
 | |
| 		case '=':
 | |
| 			return l.lexEqual
 | |
| 		case '[':
 | |
| 			return l.lexLeftBracket
 | |
| 		case ']':
 | |
| 			return l.lexRightBracket
 | |
| 		case '{':
 | |
| 			return l.lexLeftCurlyBrace
 | |
| 		case '}':
 | |
| 			return l.lexRightCurlyBrace
 | |
| 		case '#':
 | |
| 			return l.lexComment(l.lexRvalue)
 | |
| 		case '"':
 | |
| 			return l.lexString
 | |
| 		case '\'':
 | |
| 			return l.lexLiteralString
 | |
| 		case ',':
 | |
| 			return l.lexComma
 | |
| 		case '\r':
 | |
| 			fallthrough
 | |
| 		case '\n':
 | |
| 			l.skip()
 | |
| 			if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' {
 | |
| 				return l.lexRvalue
 | |
| 			}
 | |
| 			return l.lexVoid
 | |
| 		}
 | |
| 
 | |
| 		if l.follow("true") {
 | |
| 			return l.lexTrue
 | |
| 		}
 | |
| 
 | |
| 		if l.follow("false") {
 | |
| 			return l.lexFalse
 | |
| 		}
 | |
| 
 | |
| 		if l.follow("inf") {
 | |
| 			return l.lexInf
 | |
| 		}
 | |
| 
 | |
| 		if l.follow("nan") {
 | |
| 			return l.lexNan
 | |
| 		}
 | |
| 
 | |
| 		if isSpace(next) {
 | |
| 			l.skip()
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		if next == eof {
 | |
| 			l.next()
 | |
| 			break
 | |
| 		}
 | |
| 
 | |
| 		possibleDate := l.peekString(35)
 | |
| 		dateSubmatches := dateRegexp.FindStringSubmatch(possibleDate)
 | |
| 		if dateSubmatches != nil && dateSubmatches[0] != "" {
 | |
| 			l.fastForward(len(dateSubmatches[0]))
 | |
| 			if dateSubmatches[2] == "" { // no timezone information => local date
 | |
| 				return l.lexLocalDate
 | |
| 			}
 | |
| 			return l.lexDate
 | |
| 		}
 | |
| 
 | |
| 		if next == '+' || next == '-' || isDigit(next) {
 | |
| 			return l.lexNumber
 | |
| 		}
 | |
| 
 | |
| 		return l.errorf("no value can start with %c", next)
 | |
| 	}
 | |
| 
 | |
| 	l.emit(tokenEOF)
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 	l.emit(tokenLeftCurlyBrace)
 | |
| 	l.brackets = append(l.brackets, '{')
 | |
| 	return l.lexVoid
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 	l.emit(tokenRightCurlyBrace)
 | |
| 	if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' {
 | |
| 		return l.errorf("cannot have '}' here")
 | |
| 	}
 | |
| 	l.brackets = l.brackets[:len(l.brackets)-1]
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexDate() tomlLexStateFn {
 | |
| 	l.emit(tokenDate)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexLocalDate() tomlLexStateFn {
 | |
| 	l.emit(tokenLocalDate)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexTrue() tomlLexStateFn {
 | |
| 	l.fastForward(4)
 | |
| 	l.emit(tokenTrue)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexFalse() tomlLexStateFn {
 | |
| 	l.fastForward(5)
 | |
| 	l.emit(tokenFalse)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexInf() tomlLexStateFn {
 | |
| 	l.fastForward(3)
 | |
| 	l.emit(tokenInf)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexNan() tomlLexStateFn {
 | |
| 	l.fastForward(3)
 | |
| 	l.emit(tokenNan)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexEqual() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 	l.emit(tokenEqual)
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexComma() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 	l.emit(tokenComma)
 | |
| 	if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' {
 | |
| 		return l.lexVoid
 | |
| 	}
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| // Parse the key and emits its value without escape sequences.
 | |
| // bare keys, basic string keys and literal string keys are supported.
 | |
| func (l *tomlLexer) lexKey() tomlLexStateFn {
 | |
| 	var sb strings.Builder
 | |
| 
 | |
| 	for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
 | |
| 		if r == '"' {
 | |
| 			l.next()
 | |
| 			str, err := l.lexStringAsString(`"`, false, true)
 | |
| 			if err != nil {
 | |
| 				return l.errorf(err.Error())
 | |
| 			}
 | |
| 			sb.WriteString("\"")
 | |
| 			sb.WriteString(str)
 | |
| 			sb.WriteString("\"")
 | |
| 			l.next()
 | |
| 			continue
 | |
| 		} else if r == '\'' {
 | |
| 			l.next()
 | |
| 			str, err := l.lexLiteralStringAsString(`'`, false)
 | |
| 			if err != nil {
 | |
| 				return l.errorf(err.Error())
 | |
| 			}
 | |
| 			sb.WriteString("'")
 | |
| 			sb.WriteString(str)
 | |
| 			sb.WriteString("'")
 | |
| 			l.next()
 | |
| 			continue
 | |
| 		} else if r == '\n' {
 | |
| 			return l.errorf("keys cannot contain new lines")
 | |
| 		} else if isSpace(r) {
 | |
| 			var str strings.Builder
 | |
| 			str.WriteString(" ")
 | |
| 
 | |
| 			// skip trailing whitespace
 | |
| 			l.next()
 | |
| 			for r = l.peek(); isSpace(r); r = l.peek() {
 | |
| 				str.WriteRune(r)
 | |
| 				l.next()
 | |
| 			}
 | |
| 			// break loop if not a dot
 | |
| 			if r != '.' {
 | |
| 				break
 | |
| 			}
 | |
| 			str.WriteString(".")
 | |
| 			// skip trailing whitespace after dot
 | |
| 			l.next()
 | |
| 			for r = l.peek(); isSpace(r); r = l.peek() {
 | |
| 				str.WriteRune(r)
 | |
| 				l.next()
 | |
| 			}
 | |
| 			sb.WriteString(str.String())
 | |
| 			continue
 | |
| 		} else if r == '.' {
 | |
| 			// skip
 | |
| 		} else if !isValidBareChar(r) {
 | |
| 			return l.errorf("keys cannot contain %c character", r)
 | |
| 		}
 | |
| 		sb.WriteRune(r)
 | |
| 		l.next()
 | |
| 	}
 | |
| 	l.emitWithValue(tokenKey, sb.String())
 | |
| 	return l.lexVoid
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
 | |
| 	return func() tomlLexStateFn {
 | |
| 		for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
 | |
| 			if next == '\r' && l.follow("\r\n") {
 | |
| 				break
 | |
| 			}
 | |
| 			l.next()
 | |
| 		}
 | |
| 		l.ignore()
 | |
| 		return previousState
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 	l.emit(tokenLeftBracket)
 | |
| 	l.brackets = append(l.brackets, '[')
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
 | |
| 	var sb strings.Builder
 | |
| 
 | |
| 	if discardLeadingNewLine {
 | |
| 		if l.follow("\r\n") {
 | |
| 			l.skip()
 | |
| 			l.skip()
 | |
| 		} else if l.peek() == '\n' {
 | |
| 			l.skip()
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// find end of string
 | |
| 	for {
 | |
| 		if l.follow(terminator) {
 | |
| 			return sb.String(), nil
 | |
| 		}
 | |
| 
 | |
| 		next := l.peek()
 | |
| 		if next == eof {
 | |
| 			break
 | |
| 		}
 | |
| 		sb.WriteRune(l.next())
 | |
| 	}
 | |
| 
 | |
| 	return "", errors.New("unclosed string")
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
 | |
| 	l.skip()
 | |
| 
 | |
| 	// handle special case for triple-quote
 | |
| 	terminator := "'"
 | |
| 	discardLeadingNewLine := false
 | |
| 	if l.follow("''") {
 | |
| 		l.skip()
 | |
| 		l.skip()
 | |
| 		terminator = "'''"
 | |
| 		discardLeadingNewLine = true
 | |
| 	}
 | |
| 
 | |
| 	str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
 | |
| 	if err != nil {
 | |
| 		return l.errorf(err.Error())
 | |
| 	}
 | |
| 
 | |
| 	l.emitWithValue(tokenString, str)
 | |
| 	l.fastForward(len(terminator))
 | |
| 	l.ignore()
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| // Lex a string and return the results as a string.
 | |
| // Terminator is the substring indicating the end of the token.
 | |
| // The resulting string does not include the terminator.
 | |
| func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
 | |
| 	var sb strings.Builder
 | |
| 
 | |
| 	if discardLeadingNewLine {
 | |
| 		if l.follow("\r\n") {
 | |
| 			l.skip()
 | |
| 			l.skip()
 | |
| 		} else if l.peek() == '\n' {
 | |
| 			l.skip()
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for {
 | |
| 		if l.follow(terminator) {
 | |
| 			return sb.String(), nil
 | |
| 		}
 | |
| 
 | |
| 		if l.follow("\\") {
 | |
| 			l.next()
 | |
| 			switch l.peek() {
 | |
| 			case '\r':
 | |
| 				fallthrough
 | |
| 			case '\n':
 | |
| 				fallthrough
 | |
| 			case '\t':
 | |
| 				fallthrough
 | |
| 			case ' ':
 | |
| 				// skip all whitespace chars following backslash
 | |
| 				for strings.ContainsRune("\r\n\t ", l.peek()) {
 | |
| 					l.next()
 | |
| 				}
 | |
| 			case '"':
 | |
| 				sb.WriteString("\"")
 | |
| 				l.next()
 | |
| 			case 'n':
 | |
| 				sb.WriteString("\n")
 | |
| 				l.next()
 | |
| 			case 'b':
 | |
| 				sb.WriteString("\b")
 | |
| 				l.next()
 | |
| 			case 'f':
 | |
| 				sb.WriteString("\f")
 | |
| 				l.next()
 | |
| 			case '/':
 | |
| 				sb.WriteString("/")
 | |
| 				l.next()
 | |
| 			case 't':
 | |
| 				sb.WriteString("\t")
 | |
| 				l.next()
 | |
| 			case 'r':
 | |
| 				sb.WriteString("\r")
 | |
| 				l.next()
 | |
| 			case '\\':
 | |
| 				sb.WriteString("\\")
 | |
| 				l.next()
 | |
| 			case 'u':
 | |
| 				l.next()
 | |
| 				var code strings.Builder
 | |
| 				for i := 0; i < 4; i++ {
 | |
| 					c := l.peek()
 | |
| 					if !isHexDigit(c) {
 | |
| 						return "", errors.New("unfinished unicode escape")
 | |
| 					}
 | |
| 					l.next()
 | |
| 					code.WriteRune(c)
 | |
| 				}
 | |
| 				intcode, err := strconv.ParseInt(code.String(), 16, 32)
 | |
| 				if err != nil {
 | |
| 					return "", errors.New("invalid unicode escape: \\u" + code.String())
 | |
| 				}
 | |
| 				sb.WriteRune(rune(intcode))
 | |
| 			case 'U':
 | |
| 				l.next()
 | |
| 				var code strings.Builder
 | |
| 				for i := 0; i < 8; i++ {
 | |
| 					c := l.peek()
 | |
| 					if !isHexDigit(c) {
 | |
| 						return "", errors.New("unfinished unicode escape")
 | |
| 					}
 | |
| 					l.next()
 | |
| 					code.WriteRune(c)
 | |
| 				}
 | |
| 				intcode, err := strconv.ParseInt(code.String(), 16, 64)
 | |
| 				if err != nil {
 | |
| 					return "", errors.New("invalid unicode escape: \\U" + code.String())
 | |
| 				}
 | |
| 				sb.WriteRune(rune(intcode))
 | |
| 			default:
 | |
| 				return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
 | |
| 			}
 | |
| 		} else {
 | |
| 			r := l.peek()
 | |
| 
 | |
| 			if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) {
 | |
| 				return "", fmt.Errorf("unescaped control character %U", r)
 | |
| 			}
 | |
| 			l.next()
 | |
| 			sb.WriteRune(r)
 | |
| 		}
 | |
| 
 | |
| 		if l.peek() == eof {
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return "", errors.New("unclosed string")
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexString() tomlLexStateFn {
 | |
| 	l.skip()
 | |
| 
 | |
| 	// handle special case for triple-quote
 | |
| 	terminator := `"`
 | |
| 	discardLeadingNewLine := false
 | |
| 	acceptNewLines := false
 | |
| 	if l.follow(`""`) {
 | |
| 		l.skip()
 | |
| 		l.skip()
 | |
| 		terminator = `"""`
 | |
| 		discardLeadingNewLine = true
 | |
| 		acceptNewLines = true
 | |
| 	}
 | |
| 
 | |
| 	str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
 | |
| 	if err != nil {
 | |
| 		return l.errorf(err.Error())
 | |
| 	}
 | |
| 
 | |
| 	l.emitWithValue(tokenString, str)
 | |
| 	l.fastForward(len(terminator))
 | |
| 	l.ignore()
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexTableKey() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 
 | |
| 	if l.peek() == '[' {
 | |
| 		// token '[[' signifies an array of tables
 | |
| 		l.next()
 | |
| 		l.emit(tokenDoubleLeftBracket)
 | |
| 		return l.lexInsideTableArrayKey
 | |
| 	}
 | |
| 	// vanilla table key
 | |
| 	l.emit(tokenLeftBracket)
 | |
| 	return l.lexInsideTableKey
 | |
| }
 | |
| 
 | |
| // Parse the key till "]]", but only bare keys are supported
 | |
| func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
 | |
| 	for r := l.peek(); r != eof; r = l.peek() {
 | |
| 		switch r {
 | |
| 		case ']':
 | |
| 			if l.currentTokenStop > l.currentTokenStart {
 | |
| 				l.emit(tokenKeyGroupArray)
 | |
| 			}
 | |
| 			l.next()
 | |
| 			if l.peek() != ']' {
 | |
| 				break
 | |
| 			}
 | |
| 			l.next()
 | |
| 			l.emit(tokenDoubleRightBracket)
 | |
| 			return l.lexVoid
 | |
| 		case '[':
 | |
| 			return l.errorf("table array key cannot contain ']'")
 | |
| 		default:
 | |
| 			l.next()
 | |
| 		}
 | |
| 	}
 | |
| 	return l.errorf("unclosed table array key")
 | |
| }
 | |
| 
 | |
| // Parse the key till "]" but only bare keys are supported
 | |
| func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
 | |
| 	for r := l.peek(); r != eof; r = l.peek() {
 | |
| 		switch r {
 | |
| 		case ']':
 | |
| 			if l.currentTokenStop > l.currentTokenStart {
 | |
| 				l.emit(tokenKeyGroup)
 | |
| 			}
 | |
| 			l.next()
 | |
| 			l.emit(tokenRightBracket)
 | |
| 			return l.lexVoid
 | |
| 		case '[':
 | |
| 			return l.errorf("table key cannot contain ']'")
 | |
| 		default:
 | |
| 			l.next()
 | |
| 		}
 | |
| 	}
 | |
| 	return l.errorf("unclosed table key")
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
 | |
| 	l.next()
 | |
| 	l.emit(tokenRightBracket)
 | |
| 	if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' {
 | |
| 		return l.errorf("cannot have ']' here")
 | |
| 	}
 | |
| 	l.brackets = l.brackets[:len(l.brackets)-1]
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| type validRuneFn func(r rune) bool
 | |
| 
 | |
| func isValidHexRune(r rune) bool {
 | |
| 	return r >= 'a' && r <= 'f' ||
 | |
| 		r >= 'A' && r <= 'F' ||
 | |
| 		r >= '0' && r <= '9' ||
 | |
| 		r == '_'
 | |
| }
 | |
| 
 | |
| func isValidOctalRune(r rune) bool {
 | |
| 	return r >= '0' && r <= '7' || r == '_'
 | |
| }
 | |
| 
 | |
| func isValidBinaryRune(r rune) bool {
 | |
| 	return r == '0' || r == '1' || r == '_'
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) lexNumber() tomlLexStateFn {
 | |
| 	r := l.peek()
 | |
| 
 | |
| 	if r == '0' {
 | |
| 		follow := l.peekString(2)
 | |
| 		if len(follow) == 2 {
 | |
| 			var isValidRune validRuneFn
 | |
| 			switch follow[1] {
 | |
| 			case 'x':
 | |
| 				isValidRune = isValidHexRune
 | |
| 			case 'o':
 | |
| 				isValidRune = isValidOctalRune
 | |
| 			case 'b':
 | |
| 				isValidRune = isValidBinaryRune
 | |
| 			default:
 | |
| 				if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
 | |
| 					return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if isValidRune != nil {
 | |
| 				l.next()
 | |
| 				l.next()
 | |
| 				digitSeen := false
 | |
| 				for {
 | |
| 					next := l.peek()
 | |
| 					if !isValidRune(next) {
 | |
| 						break
 | |
| 					}
 | |
| 					digitSeen = true
 | |
| 					l.next()
 | |
| 				}
 | |
| 
 | |
| 				if !digitSeen {
 | |
| 					return l.errorf("number needs at least one digit")
 | |
| 				}
 | |
| 
 | |
| 				l.emit(tokenInteger)
 | |
| 
 | |
| 				return l.lexRvalue
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if r == '+' || r == '-' {
 | |
| 		l.next()
 | |
| 		if l.follow("inf") {
 | |
| 			return l.lexInf
 | |
| 		}
 | |
| 		if l.follow("nan") {
 | |
| 			return l.lexNan
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	pointSeen := false
 | |
| 	expSeen := false
 | |
| 	digitSeen := false
 | |
| 	for {
 | |
| 		next := l.peek()
 | |
| 		if next == '.' {
 | |
| 			if pointSeen {
 | |
| 				return l.errorf("cannot have two dots in one float")
 | |
| 			}
 | |
| 			l.next()
 | |
| 			if !isDigit(l.peek()) {
 | |
| 				return l.errorf("float cannot end with a dot")
 | |
| 			}
 | |
| 			pointSeen = true
 | |
| 		} else if next == 'e' || next == 'E' {
 | |
| 			expSeen = true
 | |
| 			l.next()
 | |
| 			r := l.peek()
 | |
| 			if r == '+' || r == '-' {
 | |
| 				l.next()
 | |
| 			}
 | |
| 		} else if isDigit(next) {
 | |
| 			digitSeen = true
 | |
| 			l.next()
 | |
| 		} else if next == '_' {
 | |
| 			l.next()
 | |
| 		} else {
 | |
| 			break
 | |
| 		}
 | |
| 		if pointSeen && !digitSeen {
 | |
| 			return l.errorf("cannot start float with a dot")
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if !digitSeen {
 | |
| 		return l.errorf("no digit in that number")
 | |
| 	}
 | |
| 	if pointSeen || expSeen {
 | |
| 		l.emit(tokenFloat)
 | |
| 	} else {
 | |
| 		l.emit(tokenInteger)
 | |
| 	}
 | |
| 	return l.lexRvalue
 | |
| }
 | |
| 
 | |
| func (l *tomlLexer) run() {
 | |
| 	for state := l.lexVoid; state != nil; {
 | |
| 		state = state()
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func init() {
 | |
| 	// Regexp for all date/time formats supported by TOML.
 | |
| 	// Group 1: nano precision
 | |
| 	// Group 2: timezone
 | |
| 	//
 | |
| 	// /!\ also matches the empty string
 | |
| 	//
 | |
| 	// Example matches:
 | |
| 	// 1979-05-27T07:32:00Z
 | |
| 	// 1979-05-27T00:32:00-07:00
 | |
| 	// 1979-05-27T00:32:00.999999-07:00
 | |
| 	// 1979-05-27 07:32:00Z
 | |
| 	// 1979-05-27 00:32:00-07:00
 | |
| 	// 1979-05-27 00:32:00.999999-07:00
 | |
| 	// 1979-05-27T07:32:00
 | |
| 	// 1979-05-27T00:32:00.999999
 | |
| 	// 1979-05-27 07:32:00
 | |
| 	// 1979-05-27 00:32:00.999999
 | |
| 	// 1979-05-27
 | |
| 	// 07:32:00
 | |
| 	// 00:32:00.999999
 | |
| 	dateRegexp = regexp.MustCompile(`^(?:\d{1,4}-\d{2}-\d{2})?(?:[T ]?\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})?)?`)
 | |
| }
 | |
| 
 | |
| // Entry point
 | |
| func lexToml(inputBytes []byte) []token {
 | |
| 	runes := bytes.Runes(inputBytes)
 | |
| 	l := &tomlLexer{
 | |
| 		input:         runes,
 | |
| 		tokens:        make([]token, 0, 256),
 | |
| 		line:          1,
 | |
| 		col:           1,
 | |
| 		endbufferLine: 1,
 | |
| 		endbufferCol:  1,
 | |
| 	}
 | |
| 	l.run()
 | |
| 	return l.tokens
 | |
| }
 |