blob: b5cfcc12b221f87ef21f8e81038e5ca46241f911 [file] [log] [blame]
#include <cassert>
#include <memory>
#include "exp.h"
#include "scanner.h"
#include "token.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
namespace YAML {
Scanner::Scanner(std::istream& in)
: INPUT(in),
m_startedStream(false),
m_endedStream(false),
m_simpleKeyAllowed(false),
m_canBeJSONFlow(false) {}
Scanner::~Scanner() {}
bool Scanner::empty() {
EnsureTokensInQueue();
return m_tokens.empty();
}
void Scanner::pop() {
EnsureTokensInQueue();
if (!m_tokens.empty())
m_tokens.pop();
}
Token& Scanner::peek() {
EnsureTokensInQueue();
assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
// just be checking
// if it's empty before peeking.
#if 0
static Token *pLast = 0;
if(pLast != &m_tokens.front())
std::cerr << "peek: " << m_tokens.front() << "\n";
pLast = &m_tokens.front();
#endif
return m_tokens.front();
}
Mark Scanner::mark() const { return INPUT.mark(); }
void Scanner::EnsureTokensInQueue() {
while (1) {
if (!m_tokens.empty()) {
Token& token = m_tokens.front();
// if this guy's valid, then we're done
if (token.status == Token::VALID) {
return;
}
// here's where we clean up the impossible tokens
if (token.status == Token::INVALID) {
m_tokens.pop();
continue;
}
// note: what's left are the unverified tokens
}
// no token? maybe we've actually finished
if (m_endedStream) {
return;
}
// no? then scan...
ScanNextToken();
}
}
void Scanner::ScanNextToken() {
if (m_endedStream) {
return;
}
if (!m_startedStream) {
return StartStream();
}
// get rid of whitespace, etc. (in between tokens it should be irrelevent)
ScanToNextToken();
// maybe need to end some blocks
PopIndentToHere();
// *****
// And now branch based on the next few characters!
// *****
// end of stream
if (!INPUT) {
return EndStream();
}
if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
return ScanDirective();
}
// document token
if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
return ScanDocStart();
}
if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
return ScanDocEnd();
}
// flow start/end/entry
if (INPUT.peek() == Keys::FlowSeqStart ||
INPUT.peek() == Keys::FlowMapStart) {
return ScanFlowStart();
}
if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
return ScanFlowEnd();
}
if (INPUT.peek() == Keys::FlowEntry) {
return ScanFlowEntry();
}
// block/map stuff
if (Exp::BlockEntry().Matches(INPUT)) {
return ScanBlockEntry();
}
if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
return ScanKey();
}
if (GetValueRegex().Matches(INPUT)) {
return ScanValue();
}
// alias/anchor
if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
return ScanAnchorOrAlias();
}
// tag
if (INPUT.peek() == Keys::Tag) {
return ScanTag();
}
// special scalars
if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
INPUT.peek() == Keys::FoldedScalar)) {
return ScanBlockScalar();
}
if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
return ScanQuotedScalar();
}
// plain scalars
if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
.Matches(INPUT)) {
return ScanPlainScalar();
}
// don't know what it is!
throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
}
void Scanner::ScanToNextToken() {
while (1) {
// first eat whitespace
while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
m_simpleKeyAllowed = false;
}
INPUT.eat(1);
}
// then eat a comment
if (Exp::Comment().Matches(INPUT)) {
// eat until line break
while (INPUT && !Exp::Break().Matches(INPUT)) {
INPUT.eat(1);
}
}
// if it's NOT a line break, then we're done!
if (!Exp::Break().Matches(INPUT)) {
break;
}
// otherwise, let's eat the line break and keep going
int n = Exp::Break().Match(INPUT);
INPUT.eat(n);
// oh yeah, and let's get rid of that simple key
InvalidateSimpleKey();
// new line - we may be able to accept a simple key now
if (InBlockContext()) {
m_simpleKeyAllowed = true;
}
}
}
///////////////////////////////////////////////////////////////////////
// Misc. helpers
// IsWhitespaceToBeEaten
// . We can eat whitespace if it's a space or tab
// . Note: originally tabs in block context couldn't be eaten
// "where a simple key could be allowed
// (i.e., not at the beginning of a line, or following '-', '?', or
// ':')"
// I think this is wrong, since tabs can be non-content whitespace; it's just
// that they can't contribute to indentation, so once you've seen a tab in a
// line, you can't start a simple key
bool Scanner::IsWhitespaceToBeEaten(char ch) {
if (ch == ' ') {
return true;
}
if (ch == '\t') {
return true;
}
return false;
}
const RegEx& Scanner::GetValueRegex() const {
if (InBlockContext()) {
return Exp::Value();
}
return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}
void Scanner::StartStream() {
m_startedStream = true;
m_simpleKeyAllowed = true;
std::unique_ptr<IndentMarker> pIndent(
new IndentMarker(-1, IndentMarker::NONE));
m_indentRefs.push_back(std::move(pIndent));
m_indents.push(&m_indentRefs.back());
}
void Scanner::EndStream() {
// force newline
if (INPUT.column() > 0) {
INPUT.ResetColumn();
}
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_endedStream = true;
}
Token* Scanner::PushToken(Token::TYPE type) {
m_tokens.push(Token(type, INPUT.mark()));
return &m_tokens.back();
}
Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
switch (type) {
case IndentMarker::SEQ:
return Token::BLOCK_SEQ_START;
case IndentMarker::MAP:
return Token::BLOCK_MAP_START;
case IndentMarker::NONE:
assert(false);
break;
}
assert(false);
throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
}
Scanner::IndentMarker* Scanner::PushIndentTo(int column,
IndentMarker::INDENT_TYPE type) {
// are we in flow?
if (InFlowContext()) {
return 0;
}
std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
IndentMarker& indent = *pIndent;
const IndentMarker& lastIndent = *m_indents.top();
// is this actually an indentation?
if (indent.column < lastIndent.column) {
return 0;
}
if (indent.column == lastIndent.column &&
!(indent.type == IndentMarker::SEQ &&
lastIndent.type == IndentMarker::MAP)) {
return 0;
}
// push a start token
indent.pStartToken = PushToken(GetStartTokenFor(type));
// and then the indent
m_indents.push(&indent);
m_indentRefs.push_back(std::move(pIndent));
return &m_indentRefs.back();
}
void Scanner::PopIndentToHere() {
// are we in flow?
if (InFlowContext()) {
return;
}
// now pop away
while (!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top();
if (indent.column < INPUT.column()) {
break;
}
if (indent.column == INPUT.column() &&
!(indent.type == IndentMarker::SEQ &&
!Exp::BlockEntry().Matches(INPUT))) {
break;
}
PopIndent();
}
while (!m_indents.empty() &&
m_indents.top()->status == IndentMarker::INVALID) {
PopIndent();
}
}
void Scanner::PopAllIndents() {
// are we in flow?
if (InFlowContext()) {
return;
}
// now pop away
while (!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top();
if (indent.type == IndentMarker::NONE) {
break;
}
PopIndent();
}
}
void Scanner::PopIndent() {
const IndentMarker& indent = *m_indents.top();
m_indents.pop();
if (indent.status != IndentMarker::VALID) {
InvalidateSimpleKey();
return;
}
if (indent.type == IndentMarker::SEQ) {
m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
} else if (indent.type == IndentMarker::MAP) {
m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
}
}
int Scanner::GetTopIndent() const {
if (m_indents.empty()) {
return 0;
}
return m_indents.top()->column;
}
void Scanner::ThrowParserException(const std::string& msg) const {
Mark mark = Mark::null_mark();
if (!m_tokens.empty()) {
const Token& token = m_tokens.front();
mark = token.mark;
}
throw ParserException(mark, msg);
}
} // namespace YAML