blob: fd8758d78156e5625c377a33fa710ea6e00a337c [file] [log] [blame]
#include <sstream>
#include "exp.h"
#include "regex_yaml.h"
#include "regeximpl.h"
#include "scanner.h"
#include "scanscalar.h"
#include "scantag.h" // IWYU pragma: keep
#include "tag.h" // IWYU pragma: keep
#include "token.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
#include "yaml-cpp/mark.h"
namespace YAML {
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// Directive
// . Note: no semantic checking is done here (that's for the parser to do)
void Scanner::ScanDirective() {
std::string name;
std::vector<std::string> params;
// pop indents and simple keys
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// store pos and eat indicator
Token token(Token::DIRECTIVE, INPUT.mark());
INPUT.eat(1);
// read name
while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
token.value += INPUT.get();
// read parameters
while (1) {
// first get rid of whitespace
while (Exp::Blank().Matches(INPUT))
INPUT.eat(1);
// break on newline or comment
if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
break;
// now read parameter
std::string param;
while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
param += INPUT.get();
token.params.push_back(param);
}
m_tokens.push(token);
}
// DocStart
void Scanner::ScanDocStart() {
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(3);
m_tokens.push(Token(Token::DOC_START, mark));
}
// DocEnd
void Scanner::ScanDocEnd() {
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(3);
m_tokens.push(Token(Token::DOC_END, mark));
}
// FlowStart
void Scanner::ScanFlowStart() {
// flows can be simple keys
InsertPotentialSimpleKey();
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
char ch = INPUT.get();
FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
m_flows.push(flowType);
Token::TYPE type =
(flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
m_tokens.push(Token(type, mark));
}
// FlowEnd
void Scanner::ScanFlowEnd() {
if (InBlockContext())
throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
// we might have a solo entry in the flow context
if (InFlowContext()) {
if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
m_tokens.push(Token(Token::VALUE, INPUT.mark()));
else if (m_flows.top() == FLOW_SEQ)
InvalidateSimpleKey();
}
m_simpleKeyAllowed = false;
m_canBeJSONFlow = true;
// eat
Mark mark = INPUT.mark();
char ch = INPUT.get();
// check that it matches the start
FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
if (m_flows.top() != flowType)
throw ParserException(mark, ErrorMsg::FLOW_END);
m_flows.pop();
Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
m_tokens.push(Token(type, mark));
}
// FlowEntry
void Scanner::ScanFlowEntry() {
// we might have a solo entry in the flow context
if (InFlowContext()) {
if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
m_tokens.push(Token(Token::VALUE, INPUT.mark()));
else if (m_flows.top() == FLOW_SEQ)
InvalidateSimpleKey();
}
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::FLOW_ENTRY, mark));
}
// BlockEntry
void Scanner::ScanBlockEntry() {
// we better be in the block context!
if (InFlowContext())
throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
// can we put it here?
if (!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
PushIndentTo(INPUT.column(), IndentMarker::SEQ);
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
}
// Key
void Scanner::ScanKey() {
// handle keys diffently in the block context (and manage indents)
if (InBlockContext()) {
if (!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
PushIndentTo(INPUT.column(), IndentMarker::MAP);
}
// can only put a simple key here if we're in block context
m_simpleKeyAllowed = InBlockContext();
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::KEY, mark));
}
// Value
void Scanner::ScanValue() {
// and check that simple key
bool isSimpleKey = VerifySimpleKey();
m_canBeJSONFlow = false;
if (isSimpleKey) {
// can't follow a simple key with another simple key (dunno why, though - it
// seems fine)
m_simpleKeyAllowed = false;
} else {
// handle values diffently in the block context (and manage indents)
if (InBlockContext()) {
if (!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
PushIndentTo(INPUT.column(), IndentMarker::MAP);
}
// can only put a simple key here if we're in block context
m_simpleKeyAllowed = InBlockContext();
}
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::VALUE, mark));
}
// AnchorOrAlias
void Scanner::ScanAnchorOrAlias() {
bool alias;
std::string name;
// insert a potential simple key
InsertPotentialSimpleKey();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// eat the indicator
Mark mark = INPUT.mark();
char indicator = INPUT.get();
alias = (indicator == Keys::Alias);
// now eat the content
while (INPUT && Exp::Anchor().Matches(INPUT))
name += INPUT.get();
// we need to have read SOMETHING!
if (name.empty())
throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
: ErrorMsg::ANCHOR_NOT_FOUND);
// and needs to end correctly
if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
: ErrorMsg::CHAR_IN_ANCHOR);
// and we're done
Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
token.value = name;
m_tokens.push(token);
}
// Tag
void Scanner::ScanTag() {
// insert a potential simple key
InsertPotentialSimpleKey();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
Token token(Token::TAG, INPUT.mark());
// eat the indicator
INPUT.get();
if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
std::string tag = ScanVerbatimTag(INPUT);
token.value = tag;
token.data = Tag::VERBATIM;
} else {
bool canBeHandle;
token.value = ScanTagHandle(INPUT, canBeHandle);
if (!canBeHandle && token.value.empty())
token.data = Tag::NON_SPECIFIC;
else if (token.value.empty())
token.data = Tag::SECONDARY_HANDLE;
else
token.data = Tag::PRIMARY_HANDLE;
// is there a suffix?
if (canBeHandle && INPUT.peek() == Keys::Tag) {
// eat the indicator
INPUT.get();
token.params.push_back(ScanTagSuffix(INPUT));
token.data = Tag::NAMED_HANDLE;
}
}
m_tokens.push(token);
}
// PlainScalar
void Scanner::ScanPlainScalar() {
std::string scalar;
// set up the scanning parameters
ScanScalarParams params;
params.end =
(InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
params.eatEnd = false;
params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
params.fold = FOLD_FLOW;
params.eatLeadingWhitespace = true;
params.trimTrailingSpaces = true;
params.chomp = STRIP;
params.onDocIndicator = BREAK;
params.onTabInIndentation = THROW;
// insert a potential simple key
InsertPotentialSimpleKey();
Mark mark = INPUT.mark();
scalar = ScanScalar(INPUT, params);
// can have a simple key only if we ended the scalar by starting a new line
m_simpleKeyAllowed = params.leadingSpaces;
m_canBeJSONFlow = false;
// finally, check and see if we ended on an illegal character
// if(Exp::IllegalCharInScalar.Matches(INPUT))
// throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
Token token(Token::PLAIN_SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
// QuotedScalar
void Scanner::ScanQuotedScalar() {
std::string scalar;
// peek at single or double quote (don't eat because we need to preserve (for
// the time being) the input position)
char quote = INPUT.peek();
bool single = (quote == '\'');
// setup the scanning parameters
ScanScalarParams params;
RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
params.end = &end;
params.eatEnd = true;
params.escape = (single ? '\'' : '\\');
params.indent = 0;
params.fold = FOLD_FLOW;
params.eatLeadingWhitespace = true;
params.trimTrailingSpaces = false;
params.chomp = CLIP;
params.onDocIndicator = THROW;
// insert a potential simple key
InsertPotentialSimpleKey();
Mark mark = INPUT.mark();
// now eat that opening quote
INPUT.get();
// and scan
scalar = ScanScalar(INPUT, params);
m_simpleKeyAllowed = false;
m_canBeJSONFlow = true;
Token token(Token::NON_PLAIN_SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
// BlockScalarToken
// . These need a little extra processing beforehand.
// . We need to scan the line where the indicator is (this doesn't count as part
// of the scalar),
// and then we need to figure out what level of indentation we'll be using.
void Scanner::ScanBlockScalar() {
std::string scalar;
ScanScalarParams params;
params.indent = 1;
params.detectIndent = true;
// eat block indicator ('|' or '>')
Mark mark = INPUT.mark();
char indicator = INPUT.get();
params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
// eat chomping/indentation indicators
params.chomp = CLIP;
int n = Exp::Chomp().Match(INPUT);
for (int i = 0; i < n; i++) {
char ch = INPUT.get();
if (ch == '+')
params.chomp = KEEP;
else if (ch == '-')
params.chomp = STRIP;
else if (Exp::Digit().Matches(ch)) {
if (ch == '0')
throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
params.indent = ch - '0';
params.detectIndent = false;
}
}
// now eat whitespace
while (Exp::Blank().Matches(INPUT))
INPUT.eat(1);
// and comments to the end of the line
if (Exp::Comment().Matches(INPUT))
while (INPUT && !Exp::Break().Matches(INPUT))
INPUT.eat(1);
// if it's not a line break, then we ran into a bad character inline
if (INPUT && !Exp::Break().Matches(INPUT))
throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
// set the initial indentation
if (GetTopIndent() >= 0)
params.indent += GetTopIndent();
params.eatLeadingWhitespace = false;
params.trimTrailingSpaces = false;
params.onTabInIndentation = THROW;
scalar = ScanScalar(INPUT, params);
// simple keys always ok after block scalars (since we're gonna start a new
// line anyways)
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
Token token(Token::NON_PLAIN_SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
}