blob: 695440aec0e1ae3dbe8bd8fcdb109ba506cc86c8 [file] [log] [blame]
#include <sstream>
#include "exp.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
namespace YAML {
struct Mark;
} // namespace YAML
namespace YAML {
namespace Exp {
unsigned ParseHex(const std::string& str, const Mark& mark) {
unsigned value = 0;
for (std::size_t i = 0; i < str.size(); i++) {
char ch = str[i];
int digit = 0;
if ('a' <= ch && ch <= 'f')
digit = ch - 'a' + 10;
else if ('A' <= ch && ch <= 'F')
digit = ch - 'A' + 10;
else if ('0' <= ch && ch <= '9')
digit = ch - '0';
else
throw ParserException(mark, ErrorMsg::INVALID_HEX);
value = (value << 4) + digit;
}
return value;
}
std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
// Escape
// . Translates the next 'codeLength' characters into a hex number and returns
// the result.
// . Throws if it's not actually hex.
std::string Escape(Stream& in, int codeLength) {
// grab string
std::string str;
for (int i = 0; i < codeLength; i++)
str += in.get();
// get the value
unsigned value = ParseHex(str, in.mark());
// legal unicode?
if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
std::stringstream msg;
msg << ErrorMsg::INVALID_UNICODE << value;
throw ParserException(in.mark(), msg.str());
}
// now break it up into chars
if (value <= 0x7F)
return Str(value);
else if (value <= 0x7FF)
return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
else if (value <= 0xFFFF)
return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
Str(0x80 + (value & 0x3F));
else
return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
}
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\' or single
// quote)
// and returns the result.
// . Throws if it's an unknown escape character.
std::string Escape(Stream& in) {
// eat slash
char escape = in.get();
// switch on escape character
char ch = in.get();
// first do single quote, since it's easier
if (escape == '\'' && ch == '\'')
return "\'";
// now do the slash (we're not gonna check if it's a slash - you better pass
// one!)
switch (ch) {
case '0':
return std::string(1, '\x00');
case 'a':
return "\x07";
case 'b':
return "\x08";
case 't':
case '\t':
return "\x09";
case 'n':
return "\x0A";
case 'v':
return "\x0B";
case 'f':
return "\x0C";
case 'r':
return "\x0D";
case 'e':
return "\x1B";
case ' ':
return "\x20";
case '\"':
return "\"";
case '\'':
return "\'";
case '\\':
return "\\";
case '/':
return "/";
case 'N':
return "\x85";
case '_':
return "\xA0";
case 'L':
return "\xE2\x80\xA8"; // LS (#x2028)
case 'P':
return "\xE2\x80\xA9"; // PS (#x2029)
case 'x':
return Escape(in, 2);
case 'u':
return Escape(in, 4);
case 'U':
return Escape(in, 8);
}
std::stringstream msg;
throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
}
}
}