blob: 8253b8d18dc7be0f14ce52d7169e91a2962a2946 [file] [log] [blame]
#include "scanscalar.h"
#include <algorithm>
#include "exp.h"
#include "regeximpl.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
namespace YAML {
// ScanScalar
// . This is where the scalar magic happens.
//
// . We do the scanning in three phases:
// 1. Scan until newline
// 2. Eat newline
// 3. Scan leading blanks.
//
// . Depending on the parameters given, we store or stop
// and different places in the above flow.
std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
bool foundNonEmptyLine = false;
bool pastOpeningBreak = (params.fold == FOLD_FLOW);
bool emptyLine = false, moreIndented = false;
int foldedNewlineCount = 0;
bool foldedNewlineStartedMoreIndented = false;
std::size_t lastEscapedChar = std::string::npos;
std::string scalar;
params.leadingSpaces = false;
while (INPUT) {
// ********************************
// Phase #1: scan until line ending
std::size_t lastNonWhitespaceChar = scalar.size();
bool escapedNewline = false;
while (!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
if (!INPUT)
break;
// document indicator?
if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
if (params.onDocIndicator == BREAK)
break;
else if (params.onDocIndicator == THROW)
throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
}
foundNonEmptyLine = true;
pastOpeningBreak = true;
// escaped newline? (only if we're escaping on slash)
if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
// eat escape character and get out (but preserve trailing whitespace!)
INPUT.get();
lastNonWhitespaceChar = scalar.size();
lastEscapedChar = scalar.size();
escapedNewline = true;
break;
}
// escape this?
if (INPUT.peek() == params.escape) {
scalar += Exp::Escape(INPUT);
lastNonWhitespaceChar = scalar.size();
lastEscapedChar = scalar.size();
continue;
}
// otherwise, just add the damn character
char ch = INPUT.get();
scalar += ch;
if (ch != ' ' && ch != '\t')
lastNonWhitespaceChar = scalar.size();
}
// eof? if we're looking to eat something, then we throw
if (!INPUT) {
if (params.eatEnd)
throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
break;
}
// doc indicator?
if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
Exp::DocIndicator().Matches(INPUT))
break;
// are we done via character match?
int n = params.end.Match(INPUT);
if (n >= 0) {
if (params.eatEnd)
INPUT.eat(n);
break;
}
// do we remove trailing whitespace?
if (params.fold == FOLD_FLOW)
scalar.erase(lastNonWhitespaceChar);
// ********************************
// Phase #2: eat line ending
n = Exp::Break().Match(INPUT);
INPUT.eat(n);
// ********************************
// Phase #3: scan initial spaces
// first the required indentation
while (INPUT.peek() == ' ' && (INPUT.column() < params.indent ||
(params.detectIndent && !foundNonEmptyLine)))
INPUT.eat(1);
// update indent if we're auto-detecting
if (params.detectIndent && !foundNonEmptyLine)
params.indent = std::max(params.indent, INPUT.column());
// and then the rest of the whitespace
while (Exp::Blank().Matches(INPUT)) {
// we check for tabs that masquerade as indentation
if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
params.onTabInIndentation == THROW)
throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
if (!params.eatLeadingWhitespace)
break;
INPUT.eat(1);
}
// was this an empty line?
bool nextEmptyLine = Exp::Break().Matches(INPUT);
bool nextMoreIndented = Exp::Blank().Matches(INPUT);
if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
foldedNewlineStartedMoreIndented = moreIndented;
// for block scalars, we always start with a newline, so we should ignore it
// (not fold or keep)
if (pastOpeningBreak) {
switch (params.fold) {
case DONT_FOLD:
scalar += "\n";
break;
case FOLD_BLOCK:
if (!emptyLine && !nextEmptyLine && !moreIndented &&
!nextMoreIndented && INPUT.column() >= params.indent)
scalar += " ";
else if (nextEmptyLine)
foldedNewlineCount++;
else
scalar += "\n";
if (!nextEmptyLine && foldedNewlineCount > 0) {
scalar += std::string(foldedNewlineCount - 1, '\n');
if (foldedNewlineStartedMoreIndented ||
nextMoreIndented | !foundNonEmptyLine)
scalar += "\n";
foldedNewlineCount = 0;
}
break;
case FOLD_FLOW:
if (nextEmptyLine)
scalar += "\n";
else if (!emptyLine && !nextEmptyLine && !escapedNewline)
scalar += " ";
break;
}
}
emptyLine = nextEmptyLine;
moreIndented = nextMoreIndented;
pastOpeningBreak = true;
// are we done via indentation?
if (!emptyLine && INPUT.column() < params.indent) {
params.leadingSpaces = true;
break;
}
}
// post-processing
if (params.trimTrailingSpaces) {
std::size_t pos = scalar.find_last_not_of(' ');
if (lastEscapedChar != std::string::npos) {
if (pos < lastEscapedChar || pos == std::string::npos)
pos = lastEscapedChar;
}
if (pos < scalar.size())
scalar.erase(pos + 1);
}
switch (params.chomp) {
case CLIP: {
std::size_t pos = scalar.find_last_not_of('\n');
if (lastEscapedChar != std::string::npos) {
if (pos < lastEscapedChar || pos == std::string::npos)
pos = lastEscapedChar;
}
if (pos == std::string::npos)
scalar.erase();
else if (pos + 1 < scalar.size())
scalar.erase(pos + 2);
} break;
case STRIP: {
std::size_t pos = scalar.find_last_not_of('\n');
if (lastEscapedChar != std::string::npos) {
if (pos < lastEscapedChar || pos == std::string::npos)
pos = lastEscapedChar;
}
if (pos == std::string::npos)
scalar.erase();
else if (pos < scalar.size())
scalar.erase(pos + 1);
} break;
default:
break;
}
return scalar;
}
}