blob: 241dfe5c47c202c12c203bcfa6714e63fe9077aa [file] [log] [blame]
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "WritableToken.h"
#include "Lexer.h"
#include "RuleContext.h"
#include "misc/Interval.h"
#include "Exceptions.h"
#include "support/CPPUtils.h"
#include "BufferedTokenStream.h"
using namespace antlr4;
using namespace antlrcpp;
BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){
InitializeInstanceFields();
}
TokenSource* BufferedTokenStream::getTokenSource() const {
return _tokenSource;
}
size_t BufferedTokenStream::index() {
return _p;
}
ssize_t BufferedTokenStream::mark() {
return 0;
}
void BufferedTokenStream::release(ssize_t /*marker*/) {
// no resources to release
}
void BufferedTokenStream::reset() {
seek(0);
}
void BufferedTokenStream::seek(size_t index) {
lazyInit();
_p = adjustSeekIndex(index);
}
size_t BufferedTokenStream::size() {
return _tokens.size();
}
void BufferedTokenStream::consume() {
bool skipEofCheck = false;
if (!_needSetup) {
if (_fetchedEOF) {
// the last token in tokens is EOF. skip check if p indexes any
// fetched token except the last.
skipEofCheck = _p < _tokens.size() - 1;
} else {
// no EOF token in tokens. skip check if p indexes a fetched token.
skipEofCheck = _p < _tokens.size();
}
} else {
// not yet initialized
skipEofCheck = false;
}
if (!skipEofCheck && LA(1) == Token::EOF) {
throw IllegalStateException("cannot consume EOF");
}
if (sync(_p + 1)) {
_p = adjustSeekIndex(_p + 1);
}
}
bool BufferedTokenStream::sync(size_t i) {
if (i + 1 < _tokens.size())
return true;
size_t n = i - _tokens.size() + 1; // how many more elements we need?
if (n > 0) {
size_t fetched = fetch(n);
return fetched >= n;
}
return true;
}
size_t BufferedTokenStream::fetch(size_t n) {
if (_fetchedEOF) {
return 0;
}
size_t i = 0;
while (i < n) {
std::unique_ptr<Token> t(_tokenSource->nextToken());
if (is<WritableToken *>(t.get())) {
(static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size());
}
_tokens.push_back(std::move(t));
++i;
if (_tokens.back()->getType() == Token::EOF) {
_fetchedEOF = true;
break;
}
}
return i;
}
Token* BufferedTokenStream::get(size_t i) const {
if (i >= _tokens.size()) {
throw IndexOutOfBoundsException(std::string("token index ") +
std::to_string(i) +
std::string(" out of range 0..") +
std::to_string(_tokens.size() - 1));
}
return _tokens[i].get();
}
std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) {
std::vector<Token *> subset;
lazyInit();
if (_tokens.empty()) {
return subset;
}
if (stop >= _tokens.size()) {
stop = _tokens.size() - 1;
}
for (size_t i = start; i <= stop; i++) {
Token *t = _tokens[i].get();
if (t->getType() == Token::EOF) {
break;
}
subset.push_back(t);
}
return subset;
}
size_t BufferedTokenStream::LA(ssize_t i) {
return LT(i)->getType();
}
Token* BufferedTokenStream::LB(size_t k) {
if (k > _p) {
return nullptr;
}
return _tokens[_p - k].get();
}
Token* BufferedTokenStream::LT(ssize_t k) {
lazyInit();
if (k == 0) {
return nullptr;
}
if (k < 0) {
return LB(-k);
}
size_t i = _p + k - 1;
sync(i);
if (i >= _tokens.size()) { // return EOF token
// EOF must be last token
return _tokens.back().get();
}
return _tokens[i].get();
}
ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) {
return i;
}
void BufferedTokenStream::lazyInit() {
if (_needSetup) {
setup();
}
}
void BufferedTokenStream::setup() {
_needSetup = false;
sync(0);
_p = adjustSeekIndex(0);
}
void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) {
_tokenSource = tokenSource;
_tokens.clear();
_fetchedEOF = false;
_needSetup = true;
}
std::vector<Token *> BufferedTokenStream::getTokens() {
std::vector<Token *> result;
for (auto &t : _tokens)
result.push_back(t.get());
return result;
}
std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) {
return getTokens(start, stop, std::vector<size_t>());
}
std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) {
lazyInit();
if (stop >= _tokens.size() || start >= _tokens.size()) {
throw IndexOutOfBoundsException(std::string("start ") +
std::to_string(start) +
std::string(" or stop ") +
std::to_string(stop) +
std::string(" not in 0..") +
std::to_string(_tokens.size() - 1));
}
std::vector<Token *> filteredTokens;
if (start > stop) {
return filteredTokens;
}
for (size_t i = start; i <= stop; i++) {
Token *tok = _tokens[i].get();
if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) {
filteredTokens.push_back(tok);
}
}
return filteredTokens;
}
std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) {
std::vector<size_t> s;
s.push_back(ttype);
return getTokens(start, stop, s);
}
ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) {
sync(i);
if (i >= size()) {
return size() - 1;
}
Token *token = _tokens[i].get();
while (token->getChannel() != channel) {
if (token->getType() == Token::EOF) {
return i;
}
i++;
sync(i);
token = _tokens[i].get();
}
return i;
}
ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) {
sync(i);
if (i >= size()) {
// the EOF token is on every channel
return size() - 1;
}
while (true) {
Token *token = _tokens[i].get();
if (token->getType() == Token::EOF || token->getChannel() == channel) {
return i;
}
if (i == 0)
return -1;
i--;
}
return i;
}
std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) {
lazyInit();
if (tokenIndex >= _tokens.size()) {
throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
}
ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL);
size_t to;
size_t from = tokenIndex + 1;
// if none onchannel to right, nextOnChannel=-1 so set to = last token
if (nextOnChannel == -1) {
to = static_cast<ssize_t>(size() - 1);
} else {
to = nextOnChannel;
}
return filterForChannel(from, to, channel);
}
std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) {
return getHiddenTokensToRight(tokenIndex, -1);
}
std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) {
lazyInit();
if (tokenIndex >= _tokens.size()) {
throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
}
if (tokenIndex == 0) {
// Obviously no tokens can appear before the first token.
return { };
}
ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) {
return { };
}
// if none onchannel to left, prevOnChannel=-1 then from=0
size_t from = static_cast<size_t>(prevOnChannel + 1);
size_t to = tokenIndex - 1;
return filterForChannel(from, to, channel);
}
std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) {
return getHiddenTokensToLeft(tokenIndex, -1);
}
std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) {
std::vector<Token *> hidden;
for (size_t i = from; i <= to; i++) {
Token *t = _tokens[i].get();
if (channel == -1) {
if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) {
hidden.push_back(t);
}
} else {
if (t->getChannel() == static_cast<size_t>(channel)) {
hidden.push_back(t);
}
}
}
return hidden;
}
bool BufferedTokenStream::isInitialized() const {
return !_needSetup;
}
/**
* Get the text of all tokens in this buffer.
*/
std::string BufferedTokenStream::getSourceName() const
{
return _tokenSource->getSourceName();
}
std::string BufferedTokenStream::getText() {
fill();
return getText(misc::Interval(0U, size() - 1));
}
std::string BufferedTokenStream::getText(const misc::Interval &interval) {
lazyInit();
size_t start = interval.a;
size_t stop = interval.b;
if (start == INVALID_INDEX || stop == INVALID_INDEX) {
return "";
}
sync(stop);
if (stop >= _tokens.size()) {
stop = _tokens.size() - 1;
}
std::stringstream ss;
for (size_t i = start; i <= stop; i++) {
Token *t = _tokens[i].get();
if (t->getType() == Token::EOF) {
break;
}
ss << t->getText();
}
return ss.str();
}
std::string BufferedTokenStream::getText(RuleContext *ctx) {
return getText(ctx->getSourceInterval());
}
std::string BufferedTokenStream::getText(Token *start, Token *stop) {
if (start != nullptr && stop != nullptr) {
return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex()));
}
return "";
}
void BufferedTokenStream::fill() {
lazyInit();
const size_t blockSize = 1000;
while (true) {
size_t fetched = fetch(blockSize);
if (fetched < blockSize) {
return;
}
}
}
void BufferedTokenStream::InitializeInstanceFields() {
_needSetup = true;
_fetchedEOF = false;
}