blob: e99625b60c6e6b4b44cb345bd12b5cb789b072ae [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef avro_parsing_Symbol_hh__
#define avro_parsing_Symbol_hh__
#include <vector>
#include <map>
#include <stack>
#include <sstream>
#include <boost/any.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/weak_ptr.hpp>
#include <boost/tuple/tuple.hpp>
#include "Node.hh"
#include "Decoder.hh"
#include "Exception.hh"
namespace avro {
namespace parsing {
class Symbol;
typedef std::vector<Symbol> Production;
typedef boost::tuple<size_t, bool, Production, Production> RepeaterInfo;
typedef boost::tuple<Production, Production> RootInfo;
class Symbol {
public:
enum Kind {
sTerminalLow, // extra has nothing
sNull,
sBool,
sInt,
sLong,
sFloat,
sDouble,
sString,
sBytes,
sArrayStart,
sArrayEnd,
sMapStart,
sMapEnd,
sFixed,
sEnum,
sUnion,
sTerminalHigh,
sSizeCheck, // Extra has size
sNameList, // Extra has a vector<string>
sRoot, // Root for a schema, extra is Symbol
sRepeater, // Array or Map, extra is symbol
sAlternative, // One of many (union), extra is Union
sPlaceholder, // To be fixed up later.
sIndirect, // extra is shared_ptr<Production>
sSymbolic, // extra is weal_ptr<Production>
sEnumAdjust,
sUnionAdjust,
sSkipStart,
sResolve,
sImplicitActionLow,
sRecordStart,
sRecordEnd,
sField, // extra is string
sRecord,
sSizeList,
sWriterUnion,
sImplicitActionHigh,
sError
};
private:
Kind kind_;
boost::any extra_;
explicit Symbol(Kind k) : kind_(k) { }
template <typename T> Symbol(Kind k, T t) : kind_(k), extra_(t) { }
public:
Kind kind() const {
return kind_;
}
template <typename T> T extra() const {
return boost::any_cast<T>(extra_);
}
template <typename T> T* extrap() {
return boost::any_cast<T>(&extra_);
}
template <typename T> const T* extrap() const {
return boost::any_cast<T>(&extra_);
}
template <typename T> void extra(const T& t) {
extra_ = t;
}
bool isTerminal() const {
return kind_ > sTerminalLow && kind_ < sTerminalHigh;
}
bool isImplicitAction() const {
return kind_ > sImplicitActionLow && kind_ < sImplicitActionHigh;
}
static const char* stringValues[];
static const char* toString(Kind k) {
return stringValues[k];
}
static Symbol rootSymbol(const Production& s)
{
return Symbol(Symbol::sRoot, RootInfo(s, Production()));
}
static Symbol rootSymbol(const Production& main, const Production& backup)
{
return Symbol(Symbol::sRoot, RootInfo(main, backup));
}
static Symbol nullSymbol() {
return Symbol(sNull);
}
static Symbol boolSymbol() {
return Symbol(sBool);
}
static Symbol intSymbol() {
return Symbol(sInt);
}
static Symbol longSymbol() {
return Symbol(sLong);
}
static Symbol floatSymbol() {
return Symbol(sFloat);
}
static Symbol doubleSymbol() {
return Symbol(sDouble);
}
static Symbol stringSymbol() {
return Symbol(sString);
}
static Symbol bytesSymbol() {
return Symbol(sBytes);
}
static Symbol sizeCheckSymbol(size_t s) {
return Symbol(sSizeCheck, s);
}
static Symbol fixedSymbol() {
return Symbol(sFixed);
}
static Symbol enumSymbol() {
return Symbol(sEnum);
}
static Symbol arrayStartSymbol() {
return Symbol(sArrayStart);
}
static Symbol arrayEndSymbol() {
return Symbol(sArrayEnd);
}
static Symbol mapStartSymbol() {
return Symbol(sMapStart);
}
static Symbol mapEndSymbol() {
return Symbol(sMapEnd);
}
static Symbol repeater(const Production& p, bool isArray) {
size_t s = 0;
return Symbol(sRepeater, boost::make_tuple(s, isArray, p, p));
}
static Symbol repeater(const Production& read, const Production& skip,
bool isArray) {
size_t s = 0;
return Symbol(sRepeater, boost::make_tuple(s, isArray, read, skip));
}
static Symbol alternative(const std::vector<Production>& branches)
{
return Symbol(Symbol::sAlternative, branches);
}
static Symbol unionSymbol() {
return Symbol(sUnion);
}
static Symbol recordStartSymbol() {
return Symbol(sRecordStart);
}
static Symbol recordEndSymbol() {
return Symbol(sRecordEnd);
}
static Symbol fieldSymbol(const std::string& name) {
return Symbol(sField, name);
}
static Symbol writerUnionAction() {
return Symbol(sWriterUnion);
}
static Symbol nameListSymbol(
const std::vector<std::string>& v) {
return Symbol(sNameList, v);
}
template <typename T>
static Symbol placeholder(const T& n) {
return Symbol(sPlaceholder, n);
}
static Symbol indirect(const boost::shared_ptr<Production>& p) {
return Symbol(sIndirect, p);
}
static Symbol symbolic(const boost::weak_ptr<Production>& p) {
return Symbol(sSymbolic, p);
}
static Symbol enumAdjustSymbol(const NodePtr& writer,
const NodePtr& reader);
static Symbol unionAdjustSymbol(size_t branch, const Production& p) {
return Symbol(sUnionAdjust, std::make_pair(branch, p));
}
static Symbol sizeListAction(std::vector<size_t> order) {
return Symbol(sSizeList, order);
}
static Symbol recordAction() {
return Symbol(sRecord);
}
static Symbol error(const NodePtr& writer, const NodePtr& reader);
static Symbol resolveSymbol(Kind w, Kind r) {
return Symbol(sResolve, std::make_pair(w, r));
}
static Symbol skipStart() {
return Symbol(sSkipStart);
}
};
template<typename T>
void fixup(Production& p,
const std::map<T, boost::shared_ptr<Production> > &m)
{
for (Production::iterator it = p.begin(); it != p.end(); ++it) {
fixup(*it, m);
}
}
template<typename T>
void fixup(Symbol& s, const std::map<T, boost::shared_ptr<Production> > &m)
{
switch (s.kind()) {
case Symbol::sIndirect:
fixup(*s.extra<boost::shared_ptr<Production> >(), m);
break;
case Symbol::sAlternative:
{
std::vector<Production> *vv = s.extrap<std::vector<Production> >();
for (std::vector<Production>::iterator it = vv->begin();
it != vv->end(); ++it) {
fixup(*it, m);
}
}
break;
case Symbol::sRepeater:
{
RepeaterInfo& ri = *s.extrap<RepeaterInfo>();
fixup(boost::tuples::get<2>(ri), m);
fixup(boost::tuples::get<3>(ri), m);
}
break;
case Symbol::sPlaceholder:
s = Symbol::symbolic(boost::weak_ptr<Production>(
m.find(s.extra<T>())->second));
break;
case Symbol::sUnionAdjust:
fixup(s.extrap<std::pair<size_t, Production> >()->second, m);
break;
}
}
template<typename Handler>
class SimpleParser {
Decoder* decoder_;
Handler& handler_;
std::stack<Symbol> parsingStack;
static void throwMismatch(Symbol::Kind expected, Symbol::Kind actual)
{
std::ostringstream oss;
oss << "Invalid operation. Expected: " <<
Symbol::toString(expected) << " got " <<
Symbol::toString(actual);
throw Exception(oss.str());
}
static void assertMatch(Symbol::Kind expected, Symbol::Kind actual)
{
if (expected != actual) {
throwMismatch(expected, actual);
}
}
void append(const Production& ss) {
for (Production::const_iterator it = ss.begin();
it != ss.end(); ++it) {
parsingStack.push(*it);
}
}
size_t popSize() {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sSizeCheck, s.kind());
size_t result = s.extra<size_t>();
parsingStack.pop();
return result;
}
static void assertLessThan(size_t n, size_t s) {
if (n >= s) {
std::ostringstream oss;
oss << "Size max value. Upper bound: " << s << " found " << n;
throw Exception(oss.str());
}
}
public:
Symbol::Kind advance(Symbol::Kind k) {
for (; ;) {
Symbol& s = parsingStack.top();
if (s.kind() == k) {
parsingStack.pop();
return k;
} else if (s.isTerminal()) {
throwMismatch(k, s.kind());
} else {
switch (s.kind()) {
case Symbol::sRoot:
append(boost::tuples::get<0>(*s.extrap<RootInfo>()));
continue;
case Symbol::sIndirect:
{
boost::shared_ptr<Production> pp =
s.extra<boost::shared_ptr<Production> >();
parsingStack.pop();
append(*pp);
}
continue;
case Symbol::sSymbolic:
{
boost::shared_ptr<Production> pp(
s.extra<boost::weak_ptr<Production> >());
parsingStack.pop();
append(*pp);
}
continue;
case Symbol::sRepeater:
{
RepeaterInfo *p = s.extrap<RepeaterInfo>();
--boost::tuples::get<0>(*p);
append(boost::tuples::get<2>(*p));
}
continue;
case Symbol::sError:
throw Exception(s.extra<std::string>());
case Symbol::sResolve:
{
const std::pair<Symbol::Kind, Symbol::Kind>* p =
s.extrap<std::pair<Symbol::Kind, Symbol::Kind> >();
assertMatch(p->second, k);
Symbol::Kind result = p->first;
parsingStack.pop();
return result;
}
case Symbol::sSkipStart:
parsingStack.pop();
skip(*decoder_);
break;
default:
if (s.isImplicitAction()) {
Symbol ss = s;
parsingStack.pop();
size_t n = handler_.handle(ss);
if (ss.kind() == Symbol::sWriterUnion) {
selectBranch(n);
}
} else {
std::ostringstream oss;
oss << "Encountered " << Symbol::toString(s.kind())
<< " while looking for " << Symbol::toString(k);
throw Exception(oss.str());
}
}
}
}
}
void skip(Decoder& d) {
const size_t sz = parsingStack.size();
if (sz == 0) {
throw Exception("Nothing to skip!");
}
while (parsingStack.size() >= sz) {
Symbol& t = parsingStack.top();
switch (t.kind()) {
case Symbol::sNull:
d.decodeNull();
break;
case Symbol::sBool:
d.decodeBool();
break;
case Symbol::sInt:
d.decodeInt();
break;
case Symbol::sLong:
d.decodeLong();
break;
case Symbol::sFloat:
d.decodeFloat();
break;
case Symbol::sDouble:
d.decodeDouble();
break;
case Symbol::sString:
d.skipString();
break;
case Symbol::sBytes:
d.skipBytes();
break;
case Symbol::sArrayStart:
{
parsingStack.pop();
size_t n = d.skipArray();
assertMatch(Symbol::sRepeater, parsingStack.top().kind());
if (n == 0) {
break;
}
Symbol& t = parsingStack.top();
RepeaterInfo *p = t.extrap<RepeaterInfo>();
boost::tuples::get<0>(*p) = n;
continue;
}
case Symbol::sArrayEnd:
break;
case Symbol::sMapStart:
{
parsingStack.pop();
size_t n = d.skipMap();
if (n == 0) {
break;
}
assertMatch(Symbol::sRepeater, parsingStack.top().kind());
Symbol& t = parsingStack.top();
RepeaterInfo *p = t.extrap<RepeaterInfo>();
boost::tuples::get<0>(*p) = n;
continue;
}
case Symbol::sMapEnd:
break;
case Symbol::sFixed:
{
parsingStack.pop();
Symbol& t = parsingStack.top();
d.decodeFixed(t.extra<size_t>());
}
break;
case Symbol::sEnum:
parsingStack.pop();
d.decodeEnum();
break;
case Symbol::sUnion:
{
parsingStack.pop();
size_t n = d.decodeUnionIndex();
selectBranch(n);
continue;
}
case Symbol::sRepeater:
{
RepeaterInfo *p = t.extrap<RepeaterInfo>();
if (boost::tuples::get<0>(*p) == 0) {
boost::tuples::get<0>(*p) =
boost::tuples::get<1>(*p) ? d.arrayNext() :
d.mapNext();
}
if (boost::tuples::get<0>(*p) != 0) {
--boost::tuples::get<0>(*p);
append(boost::tuples::get<3>(*p));
continue;
}
}
break;
case Symbol::sIndirect:
{
boost::shared_ptr<Production> pp =
t.extra<boost::shared_ptr<Production> >();
parsingStack.pop();
append(*pp);
}
continue;
case Symbol::sSymbolic:
{
boost::shared_ptr<Production> pp(
t.extra<boost::weak_ptr<Production> >());
parsingStack.pop();
append(*pp);
}
continue;
default:
{
std::ostringstream oss;
oss << "Don't know how to skip "
<< Symbol::toString(t.kind());
throw Exception(oss.str());
}
}
parsingStack.pop();
}
}
void assertSize(size_t n) {
size_t s = popSize();
if (s != n) {
std::ostringstream oss;
oss << "Incorrect size. Expected: " << s << " found " << n;
throw Exception(oss.str());
}
}
void assertLessThanSize(size_t n) {
assertLessThan(n, popSize());
}
size_t enumAdjust(size_t n) {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sEnumAdjust, s.kind());
const std::pair<std::vector<int>, std::vector<std::string> >* v =
s.extrap<std::pair<std::vector<int>, std::vector<std::string> > >();
assertLessThan(n, v->first.size());
int result = v->first[n];
if (result < 0) {
std::ostringstream oss;
oss << "Cannot resolve symbol: " << v->second[-result - 1]
<< std::endl;
throw Exception(oss.str());
}
parsingStack.pop();
return result;
}
size_t unionAdjust() {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sUnionAdjust, s.kind());
std::pair<size_t, Production> p = s.extra<std::pair<size_t,
Production> >();
parsingStack.pop();
append(p.second);
return p.first;
}
std::string nameForIndex(size_t e) {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sNameList, s.kind());
const std::vector<std::string> names =
s.extra<std::vector<std::string> >();
if (e >= names.size()) {
throw Exception("Not that many names");
}
std::string result = names[e];
parsingStack.pop();
return result;
}
size_t indexForName(const std::string &name) {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sNameList, s.kind());
const std::vector<std::string> names =
s.extra<std::vector<std::string> >();
std::vector<std::string>::const_iterator it =
std::find(names.begin(), names.end(), name);
if (it == names.end()) {
throw Exception("No such enum symbol");
}
size_t result = it - names.begin();
parsingStack.pop();
return result;
}
void setRepeatCount(size_t n) {
Symbol& s = parsingStack.top();
assertMatch(Symbol::sRepeater, s.kind());
size_t& nn = boost::tuples::get<0>(*s.extrap<RepeaterInfo>());
if (nn != 0) {
throw Exception("Wrong number of items");
}
nn = n;
}
void popRepeater() {
processImplicitActions();
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sRepeater, s.kind());
if (boost::tuples::get<0>(*s.extrap<RepeaterInfo>()) != 0) {
throw Exception("Incorrect number of items");
}
parsingStack.pop();
}
void selectBranch(size_t n) {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sAlternative, s.kind());
std::vector<Production> v = s.extra<std::vector<Production> >();
if (n >= v.size()) {
throw Exception("Not that many branches");
}
parsingStack.pop();
append(v[n]);
}
const std::vector<size_t>& sizeList() {
const Symbol& s = parsingStack.top();
assertMatch(Symbol::sSizeList, s.kind());
return *s.extrap<std::vector<size_t> >();
}
Symbol::Kind top() const {
return parsingStack.top().kind();
}
void pop() {
parsingStack.pop();
}
void processImplicitActions() {
for (; ;) {
Symbol& s = parsingStack.top();
if (s.isImplicitAction()) {
handler_.handle(s);
parsingStack.pop();
} else {
break;
}
}
}
SimpleParser(const Symbol& s, Decoder* d, Handler& h) :
decoder_(d), handler_(h) {
parsingStack.push(s);
}
};
} // namespace parsing
} // namespace avro
#endif