vendor/github.com/hashicorp/hil/scanner/scanner.go - cloudstack-terraform-provider - Git at Google

 package scanner

 import (
 	"unicode"
 	"unicode/utf8"

 	"github.com/hashicorp/hil/ast"
 )

 // Scan returns a channel that recieves Tokens from the given input string.
 //
 // The scanner's job is just to partition the string into meaningful parts.
 // It doesn't do any transformation of the raw input string, so the caller
 // must deal with any further interpretation required, such as parsing INTEGER
 // tokens into real ints, or dealing with escape sequences in LITERAL or
 // STRING tokens.
 //
 // Strings in the returned tokens are slices from the original string.
 //
 // startPos should be set to ast.InitPos unless the caller knows that
 // this interpolation string is part of a larger file and knows the position
 // of the first character in that larger file.
 func Scan(s string, startPos ast.Pos) <-chan *Token {
 	ch := make(chan *Token)
 	go scan(s, ch, startPos)
 	return ch
 }

 func scan(s string, ch chan<- *Token, pos ast.Pos) {
 	// 'remain' starts off as the whole string but we gradually
 	// slice of the front of it as we work our way through.
 	remain := s

 	// nesting keeps track of how many ${ .. } sequences we are
 	// inside, so we can recognize the minor differences in syntax
 	// between outer string literals (LITERAL tokens) and quoted
 	// string literals (STRING tokens).
 	nesting := 0

 	// We're going to flip back and forth between parsing literals/strings
 	// and parsing interpolation sequences ${ .. } until we reach EOF or
 	// some INVALID token.
 All:
 	for {
 		startPos := pos
 		// Literal string processing first, since the beginning of
 		// a string is always outside of an interpolation sequence.
 		literalVal, terminator := scanLiteral(remain, pos, nesting > 0)

 		if len(literalVal) > 0 {
 			litType := LITERAL
 			if nesting > 0 {
 				litType = STRING
 			}
 			ch <- &Token{
 				Type:    litType,
 				Content: literalVal,
 				Pos:     startPos,
 			}
 			remain = remain[len(literalVal):]
 		}

 		ch <- terminator
 		remain = remain[len(terminator.Content):]
 		pos = terminator.Pos
 		// Safe to use len() here because none of the terminator tokens
 		// can contain UTF-8 sequences.
 		pos.Column = pos.Column + len(terminator.Content)

 		switch terminator.Type {
 		case INVALID:
 			// Synthetic EOF after invalid token, since further scanning
 			// is likely to just produce more garbage.
 			ch <- &Token{
 				Type:    EOF,
 				Content: "",
 				Pos:     pos,
 			}
 			break All
 		case EOF:
 			// All done!
 			break All
 		case BEGIN:
 			nesting++
 		case CQUOTE:
 			// nothing special to do
 		default:
 			// Should never happen
 			panic("invalid string/literal terminator")
 		}

 		// Now we do the processing of the insides of ${ .. } sequences.
 		// This loop terminates when we encounter either a closing } or
 		// an opening ", which will cause us to return to literal processing.
 	Interpolation:
 		for {

 			token, size, newPos := scanInterpolationToken(remain, pos)
 			ch <- token
 			remain = remain[size:]
 			pos = newPos

 			switch token.Type {
 			case INVALID:
 				// Synthetic EOF after invalid token, since further scanning
 				// is likely to just produce more garbage.
 				ch <- &Token{
 					Type:    EOF,
 					Content: "",
 					Pos:     pos,
 				}
 				break All
 			case EOF:
 				// All done
 				// (though a syntax error that we'll catch in the parser)
 				break All
 			case END:
 				nesting--
 				if nesting < 0 {
 					// Can happen if there are unbalanced ${ and } sequences
 					// in the input, which we'll catch in the parser.
 					nesting = 0
 				}
 				break Interpolation
 			case OQUOTE:
 				// Beginning of nested quoted string
 				break Interpolation
 			}
 		}
 	}

 	close(ch)
 }

 // Returns the token found at the start of the given string, followed by
 // the number of bytes that were consumed from the string and the adjusted
 // source position.
 //
 // Note that the number of bytes consumed can be more than the length of
 // the returned token contents if the string begins with whitespace, since
 // it will be silently consumed before reading the token.
 func scanInterpolationToken(s string, startPos ast.Pos) (*Token, int, ast.Pos) {
 	pos := startPos
 	size := 0

 	// Consume whitespace, if any
 	for len(s) > 0 && byteIsSpace(s[0]) {
 		if s[0] == '\n' {
 			pos.Column = 1
 			pos.Line++
 		} else {
 			pos.Column++
 		}
 		size++
 		s = s[1:]
 	}

 	// Unexpected EOF during sequence
 	if len(s) == 0 {
 		return &Token{
 			Type:    EOF,
 			Content: "",
 			Pos:     pos,
 		}, size, pos
 	}

 	next := s[0]
 	var token *Token

 	switch next {
 	case '(', ')', '[', ']', ',', '.', '+', '-', '*', '/', '%', '?', ':':
 		// Easy punctuation symbols that don't have any special meaning
 		// during scanning, and that stand for themselves in the
 		// TokenType enumeration.
 		token = &Token{
 			Type:    TokenType(next),
 			Content: s[:1],
 			Pos:     pos,
 		}
 	case '}':
 		token = &Token{
 			Type:    END,
 			Content: s[:1],
 			Pos:     pos,
 		}
 	case '"':
 		token = &Token{
 			Type:    OQUOTE,
 			Content: s[:1],
 			Pos:     pos,
 		}
 	case '!':
 		if len(s) >= 2 && s[:2] == "!=" {
 			token = &Token{
 				Type:    NOTEQUAL,
 				Content: s[:2],
 				Pos:     pos,
 			}
 		} else {
 			token = &Token{
 				Type:    BANG,
 				Content: s[:1],
 				Pos:     pos,
 			}
 		}
 	case '<':
 		if len(s) >= 2 && s[:2] == "<=" {
 			token = &Token{
 				Type:    LTE,
 				Content: s[:2],
 				Pos:     pos,
 			}
 		} else {
 			token = &Token{
 				Type:    LT,
 				Content: s[:1],
 				Pos:     pos,
 			}
 		}
 	case '>':
 		if len(s) >= 2 && s[:2] == ">=" {
 			token = &Token{
 				Type:    GTE,
 				Content: s[:2],
 				Pos:     pos,
 			}
 		} else {
 			token = &Token{
 				Type:    GT,
 				Content: s[:1],
 				Pos:     pos,
 			}
 		}
 	case '=':
 		if len(s) >= 2 && s[:2] == "==" {
 			token = &Token{
 				Type:    EQUAL,
 				Content: s[:2],
 				Pos:     pos,
 			}
 		} else {
 			// A single equals is not a valid operator
 			token = &Token{
 				Type:    INVALID,
 				Content: s[:1],
 				Pos:     pos,
 			}
 		}
 	case '&':
 		if len(s) >= 2 && s[:2] == "&&" {
 			token = &Token{
 				Type:    AND,
 				Content: s[:2],
 				Pos:     pos,
 			}
 		} else {
 			token = &Token{
 				Type:    INVALID,
 				Content: s[:1],
 				Pos:     pos,
 			}
 		}
 	case '|':
 		if len(s) >= 2 && s[:2] == "||" {
 			token = &Token{
 				Type:    OR,
 				Content: s[:2],
 				Pos:     pos,
 			}
 		} else {
 			token = &Token{
 				Type:    INVALID,
 				Content: s[:1],
 				Pos:     pos,
 			}
 		}
 	default:
 		if next >= '0' && next <= '9' {
 			num, numType := scanNumber(s)
 			token = &Token{
 				Type:    numType,
 				Content: num,
 				Pos:     pos,
 			}
 		} else if stringStartsWithIdentifier(s) {
 			ident, runeLen := scanIdentifier(s)
 			tokenType := IDENTIFIER
 			if ident == "true" || ident == "false" {
 				tokenType = BOOL
 			}
 			token = &Token{
 				Type:    tokenType,
 				Content: ident,
 				Pos:     pos,
 			}
 			// Skip usual token handling because it doesn't
 			// know how to deal with UTF-8 sequences.
 			pos.Column = pos.Column + runeLen
 			return token, size + len(ident), pos
 		} else {
 			_, byteLen := utf8.DecodeRuneInString(s)
 			token = &Token{
 				Type:    INVALID,
 				Content: s[:byteLen],
 				Pos:     pos,
 			}
 			// Skip usual token handling because it doesn't
 			// know how to deal with UTF-8 sequences.
 			pos.Column = pos.Column + 1
 			return token, size + byteLen, pos
 		}
 	}

 	// Here we assume that the token content contains no UTF-8 sequences,
 	// because we dealt with UTF-8 characters as a special case where
 	// necessary above.
 	size = size + len(token.Content)
 	pos.Column = pos.Column + len(token.Content)

 	return token, size, pos
 }

 // Returns the (possibly-empty) prefix of the given string that represents
 // a literal, followed by the token that marks the end of the literal.
 func scanLiteral(s string, startPos ast.Pos, nested bool) (string, *Token) {
 	litLen := 0
 	pos := startPos
 	var terminator *Token
 	for {

 		if litLen >= len(s) {
 			if nested {
 				// We've ended in the middle of a quoted string,
 				// which means this token is actually invalid.
 				return "", &Token{
 					Type:    INVALID,
 					Content: s,
 					Pos:     startPos,
 				}
 			}
 			terminator = &Token{
 				Type:    EOF,
 				Content: "",
 				Pos:     pos,
 			}
 			break
 		}

 		next := s[litLen]

 		if next == '$' && len(s) > litLen+1 {
 			follow := s[litLen+1]

 			if follow == '{' {
 				terminator = &Token{
 					Type:    BEGIN,
 					Content: s[litLen : litLen+2],
 					Pos:     pos,
 				}
 				pos.Column = pos.Column + 2
 				break
 			} else if follow == '$' {
 				// Double-$ escapes the special processing of $,
 				// so we will consume both characters here.
 				pos.Column = pos.Column + 2
 				litLen = litLen + 2
 				continue
 			}
 		}

 		// special handling that applies only to quoted strings
 		if nested {
 			if next == '"' {
 				terminator = &Token{
 					Type:    CQUOTE,
 					Content: s[litLen : litLen+1],
 					Pos:     pos,
 				}
 				pos.Column = pos.Column + 1
 				break
 			}

 			// Escaped quote marks do not terminate the string.
 			//
 			// All we do here in the scanner is avoid terminating a string
 			// due to an escaped quote. The parser is responsible for the
 			// full handling of escape sequences, since it's able to produce
 			// better error messages than we can produce in here.
 			if next == '\\' && len(s) > litLen+1 {
 				follow := s[litLen+1]

 				if follow == '"' {
 					// \" escapes the special processing of ",
 					// so we will consume both characters here.
 					pos.Column = pos.Column + 2
 					litLen = litLen + 2
 					continue
 				} else if follow == '\\' {
 					// \\ escapes \
 					// so we will consume both characters here.
 					pos.Column = pos.Column + 2
 					litLen = litLen + 2
 					continue
 				}
 			}
 		}

 		if next == '\n' {
 			pos.Column = 1
 			pos.Line++
 			litLen++
 		} else {
 			pos.Column++

 			// "Column" measures runes, so we need to actually consume
 			// a valid UTF-8 character here.
 			_, size := utf8.DecodeRuneInString(s[litLen:])
 			litLen = litLen + size
 		}

 	}

 	return s[:litLen], terminator
 }

 // scanNumber returns the extent of the prefix of the string that represents
 // a valid number, along with what type of number it represents: INT or FLOAT.
 //
 // scanNumber does only basic character analysis: numbers consist of digits
 // and periods, with at least one period signalling a FLOAT. It's the parser's
 // responsibility to validate the form and range of the number, such as ensuring
 // that a FLOAT actually contains only one period, etc.
 func scanNumber(s string) (string, TokenType) {
 	period := -1
 	byteLen := 0
 	numType := INTEGER
 	for {
 		if byteLen >= len(s) {
 			break
 		}

 		next := s[byteLen]
 		if next != '.' && (next < '0' || next > '9') {
 			// If our last value was a period, then we're not a float,
 			// we're just an integer that ends in a period.
 			if period == byteLen-1 {
 				byteLen--
 				numType = INTEGER
 			}

 			break
 		}

 		if next == '.' {
 			// If we've already seen a period, break out
 			if period >= 0 {
 				break
 			}

 			period = byteLen
 			numType = FLOAT
 		}

 		byteLen++
 	}

 	return s[:byteLen], numType
 }

 // scanIdentifier returns the extent of the prefix of the string that
 // represents a valid identifier, along with the length of that prefix
 // in runes.
 //
 // Identifiers may contain utf8-encoded non-Latin letters, which will
 // cause the returned "rune length" to be shorter than the byte length
 // of the returned string.
 func scanIdentifier(s string) (string, int) {
 	byteLen := 0
 	runeLen := 0
 	for {
 		if byteLen >= len(s) {
 			break
 		}

 		nextRune, size := utf8.DecodeRuneInString(s[byteLen:])
 		if !(nextRune == '_' ||
 			nextRune == '-' ||
 			nextRune == '.' ||
 			nextRune == '*' ||
 			unicode.IsNumber(nextRune) ||
 			unicode.IsLetter(nextRune) ||
 			unicode.IsMark(nextRune)) {
 			break
 		}

 		// If we reach a star, it must be between periods to be part
 		// of the same identifier.
 		if nextRune == '*' && s[byteLen-1] != '.' {
 			break
 		}

 		// If our previous character was a star, then the current must
 		// be period. Otherwise, undo that and exit.
 		if byteLen > 0 && s[byteLen-1] == '*' && nextRune != '.' {
 			byteLen--
 			if s[byteLen-1] == '.' {
 				byteLen--
 			}

 			break
 		}

 		byteLen = byteLen + size
 		runeLen = runeLen + 1
 	}

 	return s[:byteLen], runeLen
 }

 // byteIsSpace implements a restrictive interpretation of spaces that includes
 // only what's valid inside interpolation sequences: spaces, tabs, newlines.
 func byteIsSpace(b byte) bool {
 	switch b {
 	case ' ', '\t', '\r', '\n':
 		return true
 	default:
 		return false
 	}
 }

 // stringStartsWithIdentifier returns true if the given string begins with
 // a character that is a legal start of an identifier: an underscore or
 // any character that Unicode considers to be a letter.
 func stringStartsWithIdentifier(s string) bool {
 	if len(s) == 0 {
 		return false
 	}

 	first := s[0]

 	// Easy ASCII cases first
 	if (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_' {
 		return true
 	}

 	// If our first byte begins a UTF-8 sequence then the sequence might
 	// be a unicode letter.
 	if utf8.RuneStart(first) {
 		firstRune, _ := utf8.DecodeRuneInString(s)
 		if unicode.IsLetter(firstRune) {
 			return true
 		}
 	}

 	return false
 }
	package scanner

	import (
	"unicode"
	"unicode/utf8"

	"github.com/hashicorp/hil/ast"
	)

	// Scan returns a channel that recieves Tokens from the given input string.
	//
	// The scanner's job is just to partition the string into meaningful parts.
	// It doesn't do any transformation of the raw input string, so the caller
	// must deal with any further interpretation required, such as parsing INTEGER
	// tokens into real ints, or dealing with escape sequences in LITERAL or
	// STRING tokens.
	//
	// Strings in the returned tokens are slices from the original string.
	//
	// startPos should be set to ast.InitPos unless the caller knows that
	// this interpolation string is part of a larger file and knows the position
	// of the first character in that larger file.
	func Scan(s string, startPos ast.Pos) <-chan *Token {
	ch := make(chan *Token)
	go scan(s, ch, startPos)
	return ch
	}

	func scan(s string, ch chan<- *Token, pos ast.Pos) {
	// 'remain' starts off as the whole string but we gradually
	// slice of the front of it as we work our way through.
	remain := s

	// nesting keeps track of how many ${ .. } sequences we are
	// inside, so we can recognize the minor differences in syntax
	// between outer string literals (LITERAL tokens) and quoted
	// string literals (STRING tokens).
	nesting := 0

	// We're going to flip back and forth between parsing literals/strings
	// and parsing interpolation sequences ${ .. } until we reach EOF or
	// some INVALID token.
	All:
	for {
	startPos := pos
	// Literal string processing first, since the beginning of
	// a string is always outside of an interpolation sequence.
	literalVal, terminator := scanLiteral(remain, pos, nesting > 0)

	if len(literalVal) > 0 {
	litType := LITERAL
	if nesting > 0 {
	litType = STRING
	}
	ch <- &Token{
	Type: litType,
	Content: literalVal,
	Pos: startPos,
	}
	remain = remain[len(literalVal):]
	}

	ch <- terminator
	remain = remain[len(terminator.Content):]
	pos = terminator.Pos
	// Safe to use len() here because none of the terminator tokens
	// can contain UTF-8 sequences.
	pos.Column = pos.Column + len(terminator.Content)

	switch terminator.Type {
	case INVALID:
	// Synthetic EOF after invalid token, since further scanning
	// is likely to just produce more garbage.
	ch <- &Token{
	Type: EOF,
	Content: "",
	Pos: pos,
	}
	break All
	case EOF:
	// All done!
	break All
	case BEGIN:
	nesting++
	case CQUOTE:
	// nothing special to do
	default:
	// Should never happen
	panic("invalid string/literal terminator")
	}

	// Now we do the processing of the insides of ${ .. } sequences.
	// This loop terminates when we encounter either a closing } or
	// an opening ", which will cause us to return to literal processing.
	Interpolation:
	for {

	token, size, newPos := scanInterpolationToken(remain, pos)
	ch <- token
	remain = remain[size:]
	pos = newPos

	switch token.Type {
	case INVALID:
	// Synthetic EOF after invalid token, since further scanning
	// is likely to just produce more garbage.
	ch <- &Token{
	Type: EOF,
	Content: "",
	Pos: pos,
	}
	break All
	case EOF:
	// All done
	// (though a syntax error that we'll catch in the parser)
	break All
	case END:
	nesting--
	if nesting < 0 {
	// Can happen if there are unbalanced ${ and } sequences
	// in the input, which we'll catch in the parser.
	nesting = 0
	}
	break Interpolation
	case OQUOTE:
	// Beginning of nested quoted string
	break Interpolation
	}
	}
	}

	close(ch)
	}

	// Returns the token found at the start of the given string, followed by
	// the number of bytes that were consumed from the string and the adjusted
	// source position.
	//
	// Note that the number of bytes consumed can be more than the length of
	// the returned token contents if the string begins with whitespace, since
	// it will be silently consumed before reading the token.
	func scanInterpolationToken(s string, startPos ast.Pos) (*Token, int, ast.Pos) {
	pos := startPos
	size := 0

	// Consume whitespace, if any
	for len(s) > 0 && byteIsSpace(s[0]) {
	if s[0] == '\n' {
	pos.Column = 1
	pos.Line++
	} else {
	pos.Column++
	}
	size++
	s = s[1:]
	}

	// Unexpected EOF during sequence
	if len(s) == 0 {
	return &Token{
	Type: EOF,
	Content: "",
	Pos: pos,
	}, size, pos
	}

	next := s[0]
	var token *Token

	switch next {
	case '(', ')', '[', ']', ',', '.', '+', '-', '*', '/', '%', '?', ':':
	// Easy punctuation symbols that don't have any special meaning
	// during scanning, and that stand for themselves in the
	// TokenType enumeration.
	token = &Token{
	Type: TokenType(next),
	Content: s[:1],
	Pos: pos,
	}
	case '}':
	token = &Token{
	Type: END,
	Content: s[:1],
	Pos: pos,
	}
	case '"':
	token = &Token{
	Type: OQUOTE,
	Content: s[:1],
	Pos: pos,
	}
	case '!':
	if len(s) >= 2 && s[:2] == "!=" {
	token = &Token{
	Type: NOTEQUAL,
	Content: s[:2],
	Pos: pos,
	}
	} else {
	token = &Token{
	Type: BANG,
	Content: s[:1],
	Pos: pos,
	}
	}
	case '<':
	if len(s) >= 2 && s[:2] == "<=" {
	token = &Token{
	Type: LTE,
	Content: s[:2],
	Pos: pos,
	}
	} else {
	token = &Token{
	Type: LT,
	Content: s[:1],
	Pos: pos,
	}
	}
	case '>':
	if len(s) >= 2 && s[:2] == ">=" {
	token = &Token{
	Type: GTE,
	Content: s[:2],
	Pos: pos,
	}
	} else {
	token = &Token{
	Type: GT,
	Content: s[:1],
	Pos: pos,
	}
	}
	case '=':
	if len(s) >= 2 && s[:2] == "==" {
	token = &Token{
	Type: EQUAL,
	Content: s[:2],
	Pos: pos,
	}
	} else {
	// A single equals is not a valid operator
	token = &Token{
	Type: INVALID,
	Content: s[:1],
	Pos: pos,
	}
	}
	case '&':
	if len(s) >= 2 && s[:2] == "&&" {
	token = &Token{
	Type: AND,
	Content: s[:2],
	Pos: pos,
	}
	} else {
	token = &Token{
	Type: INVALID,
	Content: s[:1],
	Pos: pos,
	}
	}
	case '\|':
	if len(s) >= 2 && s[:2] == "\|\|" {
	token = &Token{
	Type: OR,
	Content: s[:2],
	Pos: pos,
	}
	} else {
	token = &Token{
	Type: INVALID,
	Content: s[:1],
	Pos: pos,
	}
	}
	default:
	if next >= '0' && next <= '9' {
	num, numType := scanNumber(s)
	token = &Token{
	Type: numType,
	Content: num,
	Pos: pos,
	}
	} else if stringStartsWithIdentifier(s) {
	ident, runeLen := scanIdentifier(s)
	tokenType := IDENTIFIER
	if ident == "true" \|\| ident == "false" {
	tokenType = BOOL
	}
	token = &Token{
	Type: tokenType,
	Content: ident,
	Pos: pos,
	}
	// Skip usual token handling because it doesn't
	// know how to deal with UTF-8 sequences.
	pos.Column = pos.Column + runeLen
	return token, size + len(ident), pos
	} else {
	_, byteLen := utf8.DecodeRuneInString(s)
	token = &Token{
	Type: INVALID,
	Content: s[:byteLen],
	Pos: pos,
	}
	// Skip usual token handling because it doesn't
	// know how to deal with UTF-8 sequences.
	pos.Column = pos.Column + 1
	return token, size + byteLen, pos
	}
	}

	// Here we assume that the token content contains no UTF-8 sequences,
	// because we dealt with UTF-8 characters as a special case where
	// necessary above.
	size = size + len(token.Content)
	pos.Column = pos.Column + len(token.Content)

	return token, size, pos
	}

	// Returns the (possibly-empty) prefix of the given string that represents
	// a literal, followed by the token that marks the end of the literal.
	func scanLiteral(s string, startPos ast.Pos, nested bool) (string, *Token) {
	litLen := 0
	pos := startPos
	var terminator *Token
	for {

	if litLen >= len(s) {
	if nested {
	// We've ended in the middle of a quoted string,
	// which means this token is actually invalid.
	return "", &Token{
	Type: INVALID,
	Content: s,
	Pos: startPos,
	}
	}
	terminator = &Token{
	Type: EOF,
	Content: "",
	Pos: pos,
	}
	break
	}

	next := s[litLen]

	if next == '$' && len(s) > litLen+1 {
	follow := s[litLen+1]

	if follow == '{' {
	terminator = &Token{
	Type: BEGIN,
	Content: s[litLen : litLen+2],
	Pos: pos,
	}
	pos.Column = pos.Column + 2
	break
	} else if follow == '$' {
	// Double-$ escapes the special processing of $,
	// so we will consume both characters here.
	pos.Column = pos.Column + 2
	litLen = litLen + 2
	continue
	}
	}

	// special handling that applies only to quoted strings
	if nested {
	if next == '"' {
	terminator = &Token{
	Type: CQUOTE,
	Content: s[litLen : litLen+1],
	Pos: pos,
	}
	pos.Column = pos.Column + 1
	break
	}

	// Escaped quote marks do not terminate the string.
	//
	// All we do here in the scanner is avoid terminating a string
	// due to an escaped quote. The parser is responsible for the
	// full handling of escape sequences, since it's able to produce
	// better error messages than we can produce in here.
	if next == '\\' && len(s) > litLen+1 {
	follow := s[litLen+1]

	if follow == '"' {
	// \" escapes the special processing of ",
	// so we will consume both characters here.
	pos.Column = pos.Column + 2
	litLen = litLen + 2
	continue
	} else if follow == '\\' {
	// \\ escapes \
	// so we will consume both characters here.
	pos.Column = pos.Column + 2
	litLen = litLen + 2
	continue
	}
	}
	}

	if next == '\n' {
	pos.Column = 1
	pos.Line++
	litLen++
	} else {
	pos.Column++

	// "Column" measures runes, so we need to actually consume
	// a valid UTF-8 character here.
	_, size := utf8.DecodeRuneInString(s[litLen:])
	litLen = litLen + size
	}

	}

	return s[:litLen], terminator
	}

	// scanNumber returns the extent of the prefix of the string that represents
	// a valid number, along with what type of number it represents: INT or FLOAT.
	//
	// scanNumber does only basic character analysis: numbers consist of digits
	// and periods, with at least one period signalling a FLOAT. It's the parser's
	// responsibility to validate the form and range of the number, such as ensuring
	// that a FLOAT actually contains only one period, etc.
	func scanNumber(s string) (string, TokenType) {
	period := -1
	byteLen := 0
	numType := INTEGER
	for {
	if byteLen >= len(s) {
	break
	}

	next := s[byteLen]
	if next != '.' && (next < '0' \|\| next > '9') {
	// If our last value was a period, then we're not a float,
	// we're just an integer that ends in a period.
	if period == byteLen-1 {
	byteLen--
	numType = INTEGER
	}

	break
	}

	if next == '.' {
	// If we've already seen a period, break out
	if period >= 0 {
	break
	}

	period = byteLen
	numType = FLOAT
	}

	byteLen++
	}

	return s[:byteLen], numType
	}

	// scanIdentifier returns the extent of the prefix of the string that
	// represents a valid identifier, along with the length of that prefix
	// in runes.
	//
	// Identifiers may contain utf8-encoded non-Latin letters, which will
	// cause the returned "rune length" to be shorter than the byte length
	// of the returned string.
	func scanIdentifier(s string) (string, int) {
	byteLen := 0
	runeLen := 0
	for {
	if byteLen >= len(s) {
	break
	}

	nextRune, size := utf8.DecodeRuneInString(s[byteLen:])
	if !(nextRune == '_' \|\|
	nextRune == '-' \|\|
	nextRune == '.' \|\|
	nextRune == '*' \|\|
	unicode.IsNumber(nextRune) \|\|
	unicode.IsLetter(nextRune) \|\|
	unicode.IsMark(nextRune)) {
	break
	}

	// If we reach a star, it must be between periods to be part
	// of the same identifier.
	if nextRune == '*' && s[byteLen-1] != '.' {
	break
	}

	// If our previous character was a star, then the current must
	// be period. Otherwise, undo that and exit.
	if byteLen > 0 && s[byteLen-1] == '*' && nextRune != '.' {
	byteLen--
	if s[byteLen-1] == '.' {
	byteLen--
	}

	break
	}

	byteLen = byteLen + size
	runeLen = runeLen + 1
	}

	return s[:byteLen], runeLen
	}

	// byteIsSpace implements a restrictive interpretation of spaces that includes
	// only what's valid inside interpolation sequences: spaces, tabs, newlines.
	func byteIsSpace(b byte) bool {
	switch b {
	case ' ', '\t', '\r', '\n':
	return true
	default:
	return false
	}
	}

	// stringStartsWithIdentifier returns true if the given string begins with
	// a character that is a legal start of an identifier: an underscore or
	// any character that Unicode considers to be a letter.
	func stringStartsWithIdentifier(s string) bool {
	if len(s) == 0 {
	return false
	}

	first := s[0]

	// Easy ASCII cases first
	if (first >= 'a' && first <= 'z') \|\| (first >= 'A' && first <= 'Z') \|\| first == '_' {
	return true
	}

	// If our first byte begins a UTF-8 sequence then the sequence might
	// be a unicode letter.
	if utf8.RuneStart(first) {
	firstRune, _ := utf8.DecodeRuneInString(s)
	if unicode.IsLetter(firstRune) {
	return true
	}
	}

	return false
	}