| // Licensed to the Apache Software Foundation (ASF) under one or more |
| // contributor license agreements. See the NOTICE file distributed with |
| // this work for additional information regarding copyright ownership. |
| // The ASF licenses this file to You under the Apache License, Version 2.0 |
| // (the "License"); you may not use this file except in compliance with |
| // the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package lexer |
| |
| import ( |
| "errors" |
| "strings" |
| "unicode/utf8" |
| ) |
| |
| type StateFunc func(*L) StateFunc |
| |
| type TokenType int |
| |
| const ( |
| EOFRune rune = -1 |
| EmptyToken TokenType = 0 |
| ) |
| |
| type Token struct { |
| Type TokenType |
| Value string |
| } |
| |
| type L struct { |
| source string |
| start, position int |
| startState StateFunc |
| Err error |
| tokens chan Token |
| ErrorHandler func(e string) |
| rewind runeStack |
| } |
| |
| // New creates a returns a lexer ready to parse the given source code. |
| func New(src string, start StateFunc) *L { |
| return &L{ |
| source: src, |
| startState: start, |
| start: 0, |
| position: 0, |
| rewind: newRuneStack(), |
| } |
| } |
| |
| // Start begins executing the Lexer in an asynchronous manner (using a goroutine). |
| func (l *L) Start() { |
| // Take half the string length as a buffer size. |
| buffSize := len(l.source) / 2 |
| if buffSize <= 0 { |
| buffSize = 1 |
| } |
| l.tokens = make(chan Token, buffSize) |
| go l.run() |
| } |
| |
| func (l *L) StartSync() { |
| // Take half the string length as a buffer size. |
| buffSize := len(l.source) / 2 |
| if buffSize <= 0 { |
| buffSize = 1 |
| } |
| l.tokens = make(chan Token, buffSize) |
| l.run() |
| } |
| |
| // Current returns the value being being analyzed at this moment. |
| func (l *L) Current() string { |
| return l.source[l.start:l.position] |
| } |
| |
| // Emit will receive a token type and push a new token with the current analyzed |
| // value into the tokens channel. |
| func (l *L) Emit(t TokenType) { |
| tok := Token{ |
| Type: t, |
| Value: l.Current(), |
| } |
| l.tokens <- tok |
| l.start = l.position |
| l.rewind.clear() |
| } |
| |
| // Ignore clears the rewind stack and then sets the current beginning position |
| // to the current position in the source which effectively ignores the section |
| // of the source being analyzed. |
| func (l *L) Ignore() { |
| l.rewind.clear() |
| l.start = l.position |
| } |
| |
| // Peek performs a Next operation immediately followed by a Rewind returning the |
| // peeked rune. |
| func (l *L) Peek() rune { |
| r := l.Next() |
| l.Rewind() |
| |
| return r |
| } |
| |
| // Rewind will take the last rune read (if any) and rewind back. Rewinds can |
| // occur more than once per call to Next but you can never rewind past the |
| // last point a token was emitted. |
| func (l *L) Rewind() { |
| r := l.rewind.pop() |
| if r > EOFRune { |
| size := utf8.RuneLen(r) |
| l.position -= size |
| if l.position < l.start { |
| l.position = l.start |
| } |
| } |
| } |
| |
| // Next pulls the next rune from the Lexer and returns it, moving the position |
| // forward in the source. |
| func (l *L) Next() rune { |
| var ( |
| r rune |
| s int |
| ) |
| str := l.source[l.position:] |
| if len(str) == 0 { |
| r, s = EOFRune, 0 |
| } else { |
| r, s = utf8.DecodeRuneInString(str) |
| } |
| l.position += s |
| l.rewind.push(r) |
| |
| return r |
| } |
| |
| // Take receives a string containing all acceptable strings and will contine |
| // over each consecutive character in the source until a token not in the given |
| // string is encountered. This should be used to quickly pull token parts. |
| func (l *L) Take(chars string) { |
| r := l.Next() |
| for strings.ContainsRune(chars, r) { |
| r = l.Next() |
| } |
| l.Rewind() // last next wasn't a match |
| } |
| |
| // NextToken returns the next token from the lexer and a value to denote whether |
| // or not the token is finished. |
| func (l *L) NextToken() (*Token, bool) { |
| if tok, ok := <-l.tokens; ok { |
| return &tok, false |
| } else { |
| return nil, true |
| } |
| } |
| |
| // Partial yyLexer implementation |
| |
| func (l *L) Error(e string) { |
| if l.ErrorHandler != nil { |
| l.Err = errors.New(e) |
| l.ErrorHandler(e) |
| } else { |
| panic(e) |
| } |
| } |
| |
| // Private methods |
| |
| func (l *L) run() { |
| state := l.startState |
| for state != nil { |
| state = state(l) |
| } |
| close(l.tokens) |
| } |