blob: 7c346b653df98c53c155efb0205b888d4a6a30d5 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.io.parsing;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.avro.Schema;
/**
* Symbol is the base of all symbols (terminals and non-terminals) of
* the grammar.
*/
public abstract class Symbol {
/*
* The type of symbol.
*/
public enum Kind {
/** terminal symbols which have no productions */
TERMINAL,
/** Start symbol for some grammar */
ROOT,
/** non-terminal symbol which is a sequence of one or more other symbols */
SEQUENCE,
/** non-termial to represent the contents of an array or map */
REPEATER,
/** non-terminal to represent the union */
ALTERNATIVE,
/** non-terminal action symbol which are automatically consumed */
IMPLICIT_ACTION,
/** non-terminal action symbol which is explicitly consumed */
EXPLICIT_ACTION
};
/// The kind of this symbol.
public final Kind kind;
/**
* The production for this symbol. If this symbol is a terminal
* this is <tt>null</tt>. Otherwise this holds the the sequence of
* the symbols that forms the production for this symbol. The
* sequence is in the reverse order of production. This is useful
* for easy copying onto parsing stack.
*
* Please note that this is a final. So the production for a symbol
* should be known before that symbol is constructed. This requirement
* cannot be met for those symbols which are recursive (e.g. a record that
* holds union a branch of which is the record itself). To resolve this
* problem, we initialize the symbol with an array of nulls. Later we
* fill the symbols. Not clean, but works. The other option is to not have
* this field a final. But keeping it final and thus keeping symbol immutable
* gives some confort. See various generators how we generate records.
*/
public final Symbol[] production;
/**
* Constructs a new symbol of the given kind <tt>kind</tt>.
*/
protected Symbol(Kind kind) {
this(kind, null);
}
protected Symbol(Kind kind, Symbol[] production) {
this.production = production;
this.kind = kind;
}
/**
* A convenience method to construct a root symbol.
*/
static Symbol root(Symbol... symbols) {
return new Root(symbols);
}
/**
* A convenience method to construct a sequence.
* @param production The constituent symbols of the sequence.
*/
static Symbol seq(Symbol... production) {
return new Sequence(production);
}
/**
* A convenience method to construct a repeater.
* @param symsToRepeat The symbols to repeat in the repeater.
*/
static Symbol repeat(Symbol endSymbol, Symbol... symsToRepeat) {
return new Repeater(endSymbol, symsToRepeat);
}
/**
* A convenience method to construct a union.
*/
static Symbol alt(Symbol[] symbols, String[] labels) {
return new Alternative(symbols, labels);
}
/**
* A convenience method to construct an ErrorAction.
* @param e
* @return
*/
static Symbol error(String e) {
return new ErrorAction(e);
}
/**
* A convenience method to construct a ResolvingAction.
* @param w The writer symbol
* @param r The reader symbol
*/
static Symbol resolve(Symbol w, Symbol r) {
return new ResolvingAction(w, r);
}
private static class Fixup {
public final Symbol[] symbols;
public final int pos;
public Fixup(Symbol[] symbols, int pos) {
this.symbols = symbols;
this.pos = pos;
}
}
public Symbol flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
return this;
}
public int flattenedSize() {
return 1;
}
/**
* Flattens the given sub-array of symbols into an sub-array of symbols. Every
* <tt>Sequence</tt> in the input are replaced by its production recursively.
* Non-<tt>Sequence</tt> symbols, they internally have other symbols
* those internal symbols also get flattened.
*
* The algorithm does a few tricks to handle recursive symbol definitions.
* In order to avoid infinite recursion with recursive symbols, we have a map
* of Symbol->Symbol. Before fully constructing a flattened symbol for a
* <tt>Sequence</tt> we insert an empty output symbol into the map and then
* start filling the production for the <tt>Sequence</tt>. If the same
* <tt>Sequence</tt> is encountered due to recursion, we simply return the
* (empty) output <tt>Sequence<tt> from the map. Then we actually fill out
* the production for the <tt>Sequence</tt>.
* As part of the flattening process we copy the production of
* <tt>Sequence</tt>s into larger arrays. If the original <tt>Sequence</tt>
* has not not be fully constructed yet, we copy a bunch of <tt>null</tt>s.
* Fix-up remembers all those <tt>null</tt> patches. The fix-ups gets finally
* filled when we know the symbols to occupy those patches.
*
* @param in The array of input symbols to flatten
* @param start The position where the input sub-array starts.
* @param out The output that receives the flattened list of symbols. The
* output array should have sufficient space to receive the expanded sub-array
* of symbols.
* @param skip The position where the output input sub-array starts.
* @param map A map of symbols which have already been expanded. Useful for
* handling recursive definitions and for caching.
* @param map2 A map to to store the list of fix-ups.
*/
static void flatten(Symbol[] in, int start,
Symbol[] out, int skip,
Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
for (int i = start, j = skip; i < in.length; i++) {
Symbol s = in[i].flatten(map, map2);
if (s instanceof Sequence) {
Symbol[] p = s.production;
List<Fixup> l = map2.get(s);
if (l == null) {
System.arraycopy(p, 0, out, j, p.length);
} else {
l.add(new Fixup(out, j));
}
j += p.length;
} else {
out[j++] = s;
}
}
}
/**
* Returns the amount of space required to flatten the given
* sub-array of symbols.
* @param symbols The array of input symbols.
* @param start The index where the subarray starts.
* @return The number of symbols that will be produced if one expands
* the given input.
*/
protected static int flattenedSize(Symbol[] symbols, int start) {
int result = 0;
for (int i = start; i < symbols.length; i++) {
if (symbols[i] instanceof Sequence) {
Sequence s = (Sequence) symbols[i];
result += s.flattenedSize();
} else {
result += 1;
}
}
return result;
}
private static class Terminal extends Symbol {
private final String printName;
public Terminal(String printName) {
super(Kind.TERMINAL);
this.printName = printName;
}
public String toString() { return printName; }
}
public static class ImplicitAction extends Symbol {
/**
* Set to <tt>true</tt> if and only if this implicit action is
* a trailing action. That is, it is an action that follows
* real symbol. E.g {@link Symbol#DEFAULT_END_ACTION}.
*/
public final boolean isTrailing;
private ImplicitAction() {
this(false);
}
private ImplicitAction(boolean isTrailing) {
super(Kind.IMPLICIT_ACTION);
this.isTrailing = isTrailing;
}
}
protected static class Root extends Symbol {
private Root(Symbol... symbols) {
super(Kind.ROOT, makeProduction(symbols));
production[0] = this;
}
private static Symbol[] makeProduction(Symbol[] symbols) {
Symbol[] result = new Symbol[flattenedSize(symbols, 0) + 1];
flatten(symbols, 0, result, 1,
new HashMap<Sequence, Sequence>(),
new HashMap<Sequence, List<Fixup>>());
return result;
}
}
protected static class Sequence extends Symbol implements Iterable<Symbol> {
private Sequence(Symbol[] productions) {
super(Kind.SEQUENCE, productions);
}
public Symbol get(int index) {
return production[index];
}
public int size() {
return production.length;
}
public Iterator<Symbol> iterator() {
return new Iterator<Symbol>() {
private int pos = production.length;
public boolean hasNext() {
return 0 < pos;
}
public Symbol next() {
if (0 < pos) {
return production[--pos];
} else {
throw new NoSuchElementException();
}
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public Sequence flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
Sequence result = map.get(this);
if (result == null) {
result = new Sequence(new Symbol[flattenedSize()]);
map.put(this, result);
List<Fixup> l = new ArrayList<Fixup>();
map2.put(result, l);
flatten(production, 0,
result.production, 0, map, map2);
for (Fixup f : l) {
System.arraycopy(result.production, 0, f.symbols, f.pos,
result.production.length);
}
map2.remove(result);
}
return result;
}
@Override
public final int flattenedSize() {
return flattenedSize(production, 0);
}
}
public static class Repeater extends Symbol {
public final Symbol end;
private Repeater(Symbol end, Symbol... sequenceToRepeat) {
super(Kind.REPEATER, makeProduction(sequenceToRepeat));
this.end = end;
production[0] = this;
}
private static Symbol[] makeProduction(Symbol[] p) {
Symbol[] result = new Symbol[p.length + 1];
System.arraycopy(p, 0, result, 1, p.length);
return result;
}
@Override
public Repeater flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
Repeater result =
new Repeater(end, new Symbol[flattenedSize(production, 1)]);
flatten(production, 1, result.production, 1, map, map2);
return result;
}
}
public static class Alternative extends Symbol {
public final Symbol[] symbols;
public final String[] labels;
private Alternative(Symbol[] symbols, String[] labels) {
super(Kind.ALTERNATIVE);
this.symbols = symbols;
this.labels = labels;
}
public Symbol getSymbol(int index) {
return symbols[index];
}
public String getLabel(int index) {
return labels[index];
}
public int size() {
return symbols.length;
}
public int findLabel(String label) {
if (label != null) {
for (int i = 0; i < labels.length; i++) {
if (label.equals(labels[i])) {
return i;
}
}
}
return -1;
}
@Override
public Alternative flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
Symbol[] ss = new Symbol[symbols.length];
for (int i = 0; i < ss.length; i++) {
ss[i] = symbols[i].flatten(map, map2);
}
return new Alternative(ss, labels);
}
}
public static class ErrorAction extends ImplicitAction {
public final String msg;
private ErrorAction(String msg) {
this.msg = msg;
}
}
public static class IntCheckAction extends Symbol {
public final int size;
public IntCheckAction(int size) {
super(Kind.EXPLICIT_ACTION);
this.size = size;
}
}
public static class EnumAdjustAction extends IntCheckAction {
public final Object[] adjustments;
public EnumAdjustAction(int rsymCount, Object[] adjustments) {
super(rsymCount);
this.adjustments = adjustments;
}
}
public static class WriterUnionAction extends ImplicitAction {
}
public static class ResolvingAction extends ImplicitAction {
public final Symbol writer;
public final Symbol reader;
private ResolvingAction(Symbol writer, Symbol reader) {
this.writer = writer;
this.reader = reader;
}
@Override
public ResolvingAction flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
return new ResolvingAction(writer.flatten(map, map2),
reader.flatten(map, map2));
}
}
public static class SkipAction extends ImplicitAction {
public final Symbol symToSkip;
public SkipAction(Symbol symToSkip) {
this.symToSkip = symToSkip;
}
@Override
public SkipAction flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
return new SkipAction(symToSkip.flatten(map, map2));
}
}
public static class FieldAdjustAction extends ImplicitAction {
public final int rindex;
public final String fname;
public FieldAdjustAction(int rindex, String fname) {
this.rindex = rindex;
this.fname = fname;
}
}
public static final class FieldOrderAction extends ImplicitAction {
public final Schema.Field[] fields;
public FieldOrderAction(Schema.Field[] fields) {
this.fields = fields;
}
}
public static class DefaultStartAction extends ImplicitAction {
public final byte[] contents;
public DefaultStartAction(byte[] contents) {
this.contents = contents;
}
}
public static class UnionAdjustAction extends ImplicitAction {
public final int rindex;
public final Symbol symToParse;
public UnionAdjustAction(int rindex, Symbol symToParse) {
this.rindex = rindex;
this.symToParse = symToParse;
}
@Override
public UnionAdjustAction flatten(Map<Sequence, Sequence> map,
Map<Sequence, List<Fixup>> map2) {
return new UnionAdjustAction(rindex, symToParse.flatten(map, map2));
}
}
/** For JSON. */
public static class EnumLabelsAction extends IntCheckAction {
public final List<String> symbols;
public EnumLabelsAction(List<String> symbols) {
super(symbols.size());
this.symbols = symbols;
}
public String getLabel(int n) {
return symbols.get(n);
}
public int findLabel(String l) {
if (l != null) {
for (int i = 0; i < symbols.size(); i++) {
if (l.equals(symbols.get(i))) {
return i;
}
}
}
return -1;
}
}
/**
* The terminal symbols for the grammar.
*/
public static final Symbol NULL = new Symbol.Terminal("null");
public static final Symbol BOOLEAN = new Symbol.Terminal("boolean");
public static final Symbol INT = new Symbol.Terminal("int");
public static final Symbol LONG = new Symbol.Terminal("long");
public static final Symbol FLOAT = new Symbol.Terminal("float");
public static final Symbol DOUBLE = new Symbol.Terminal("double");
public static final Symbol STRING = new Symbol.Terminal("string");
public static final Symbol BYTES = new Symbol.Terminal("bytes");
public static final Symbol FIXED = new Symbol.Terminal("fixed");
public static final Symbol ENUM = new Symbol.Terminal("enum");
public static final Symbol UNION = new Symbol.Terminal("union");
public static final Symbol ARRAY_START = new Symbol.Terminal("array-start");
public static final Symbol ARRAY_END = new Symbol.Terminal("array-end");
public static final Symbol MAP_START = new Symbol.Terminal("map-start");
public static final Symbol MAP_END = new Symbol.Terminal("map-end");
public static final Symbol ITEM_END = new Symbol.Terminal("item-end");
/* a pseudo terminal used by parsers */
public static final Symbol FIELD_ACTION =
new Symbol.Terminal("field-action");
public static final Symbol RECORD_START = new ImplicitAction(false);
public static final Symbol RECORD_END = new ImplicitAction(true);
public static final Symbol UNION_END = new ImplicitAction(true);
public static final Symbol DEFAULT_END_ACTION = new ImplicitAction(true);
public static final Symbol MAP_KEY_MARKER =
new Symbol.Terminal("map-key-marker");
}