blob: faded3358d08810a9e58333e58139f4b2aedcaaa [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mrql;
import org.apache.mrql.gen.*;
import java.util.Stack;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.AttributesImpl;
import java.io.PrintStream;
import java.nio.ByteBuffer;
/** Compiles XPath queries to SAX pipelines */
final public class XPathParser {
public MRDataHandler dataConstructor;
public XPathHandler handler;
final static PrintStream out = System.out;
static int cached_events = 0;
/** a SAX handler to form XPath pipelines */
abstract class XPathHandler extends DefaultHandler {
XPathHandler next; // the next handler in the pipeline
public XPathHandler ( XPathHandler next ) {
this.next = next;
}
public void startDocument () throws SAXException {
next.startDocument();
}
public void endDocument () throws SAXException {
next.endDocument();
}
abstract public void startElement ( String uri, String name, String tag, Attributes atts ) throws SAXException;
abstract public void endElement ( String uri, String name, String tag ) throws SAXException;
abstract public void characters ( char text[], int start, int length ) throws SAXException;
/** start a new predicate */
public void startPredicate ( int pred ) {
next.startPredicate(pred);
}
/** the end of a predicate */
public void endPredicate ( int pred ) {
next.endPredicate(pred);
}
public void releasePredicate ( int pred ) { // set the predicate outcome to true
next.releasePredicate(pred);
}
}
/** The end of the pipeline: Print the SAX stream to the output */
final class Print extends XPathHandler {
public Print () {
super(null);
}
public void startDocument () {}
public void endDocument () { out.println(); }
public void startElement ( String uri, String name, String tag, Attributes atts ) {
out.append("<").append(tag);
if (atts != null)
for (int i = 0; i < atts.getLength(); i++)
out.append(" ").append(atts.getQName(i))
.append("=\"").append(atts.getValue(i)).append("\"");
out.append(">");
}
public void endElement ( String uri, String name, String tag ) {
out.append("</").append(tag).append(">");
}
public void characters ( char text[], int start, int length ) {
for (int i = 0; i < length; i++)
out.append(text[start+i]);
}
public String toString () { return "print()"; }
}
/** A growable buffer for storing events as byte sequences */
final class Cache {
final static int buffer_size_increment = 1000;
public byte[] wrapped_buffer;
public ByteBuffer byteBuffer;
public Cache () {
wrapped_buffer = new byte[buffer_size_increment];
byteBuffer = ByteBuffer.wrap(wrapped_buffer);
}
private void grow ( int len ) {
len /= java.lang.Byte.SIZE;
while (len+byteBuffer.position() > byteBuffer.limit()) {
int pos = byteBuffer.position();
byte[] nb = new byte[wrapped_buffer.length+buffer_size_increment];
for (int i = 0; i < wrapped_buffer.length; i++)
nb[i] = wrapped_buffer[i];
wrapped_buffer = nb;
byteBuffer = ByteBuffer.wrap(nb);
byteBuffer.position(pos);
}
}
public void putByte ( byte n ) {
grow(8);
byteBuffer.put(n);
}
public void putShort ( short n ) {
grow(Short.SIZE);
byteBuffer.putShort(n);
}
public void putInt ( int n ) {
grow(Integer.SIZE);
byteBuffer.putInt(n);
}
public void putChars ( char text[], int start, int len ) {
grow(len*Character.SIZE+Integer.SIZE);
byteBuffer.putInt(len);
for (int i = 0; i < len; i++)
byteBuffer.putChar(text[start+i]);
}
public void putString ( String s ) {
grow(s.length()*Character.SIZE+Short.SIZE);
int len = s.length();
byteBuffer.putShort((short) len);
for (int i = 0; i < len; i++)
byteBuffer.putChar(s.charAt(i));
}
public byte getByte () { return byteBuffer.get(); }
public short getShort () { return byteBuffer.getShort(); }
public int getInt () { return byteBuffer.getInt(); }
public char[] getChars () {
int len = byteBuffer.getInt();
char[] buf = new char[len];
for (int i = 0; i < len; i++)
buf[i] = byteBuffer.getChar();
return buf;
}
public String getString () {
int len = byteBuffer.getShort();
char[] buf = new char[len];
for (int i = 0; i < len; i++)
buf[i] = byteBuffer.getChar();
return new String(buf);
}
public void cacheStartElement ( String uri, String name, String tag, Attributes atts ) {
cached_events++;
putByte((byte)0);
putString(uri);
putString(name);
putString(tag);
if (atts != null) {
putShort((short) atts.getLength());
for (int i = 0; i < atts.getLength(); i++) {
putString(atts.getQName(i));
putString(atts.getValue(i));
}
} else putShort((short) 0);
}
public void cacheEndElement ( String uri, String name, String tag ) {
cached_events++;
putByte((byte)1);
putString(uri);
putString(name);
putString(tag);
}
public void cacheCharacters ( char text[], int start, int length ) {
cached_events++;
putByte((byte)2);
putChars(text,start,length);
}
public void print () {
System.out.println(byteBuffer);
dump(new Print());
}
public void append ( Cache cache ) {
grow(cache.byteBuffer.position());
byte[] b = cache.byteBuffer.array();
byteBuffer.put(b,0,cache.byteBuffer.position());
cache.byteBuffer.clear();
}
/** regenerate the stream from buffer */
public void dump ( XPathHandler next ) {
int last = byteBuffer.position();
byteBuffer.position(0);
while (byteBuffer.position() < last)
try {
switch (getByte()) {
case 0:
String uri = getString();
String name = getString();
String tag = getString();
AttributesImpl atts = new AttributesImpl();
int len = getShort();
for (int i = 0; i < len; i++)
atts.addAttribute("","",getString(),"",getString());
next.startElement(uri,name,tag,atts);
break;
case 1:
next.endElement(getString(),getString(),getString());
break;
case 2:
char[] text = getChars();
next.characters(text,0,text.length);
}
} catch (SAXException e) {
throw new Error(e);
};
byteBuffer.clear();
}
}
/** Remove the start/end/releasePredicate events by storing some events in a buffer, when necessary */
final class Materialize extends XPathHandler {
final static int max_num_of_nested_predicates = 100;
final public Cache cache; // nested suspended events from predicates whose outcome is unknown
final int[] ids; // the ids of the predicates with suspended output
final int[] positions; // position of predicate events in the buffer
final boolean[] released; // true if the associated predicate is true
int top; // top of the stacks
public Materialize ( XPathHandler next ) {
super(next);
cache = new Cache();
ids = new int[max_num_of_nested_predicates];
positions = new int[max_num_of_nested_predicates];
released = new boolean[max_num_of_nested_predicates];
top = 0;
}
public void startElement ( String uri, String name, String tag, Attributes atts ) throws SAXException {
if (top > 0)
cache.cacheStartElement(uri,name,tag,atts);
else next.startElement(uri,name,tag,atts);
}
public void endElement ( String uri, String name, String tag ) throws SAXException {
if (top > 0)
cache.cacheEndElement(uri,name,tag);
else next.endElement(uri,name,tag);
}
public void characters ( char text[], int start, int length ) throws SAXException {
if (top > 0)
cache.cacheCharacters(text,start,length);
else next.characters(text,start,length);
}
public void startPredicate ( int pred ) {
if (top >= ids.length)
throw new Error("too many nested predicates");
positions[top] = cache.byteBuffer.position();
ids[top] = pred;
released[top++] = false;
}
public void endPredicate ( int pred ) {
if (top > 0 && ids[top-1] == pred)
cache.byteBuffer.position(positions[--top]).mark().reset();
}
public void releasePredicate ( int pred ) {
boolean flush = true;
for (int i = 0; i < top; i++)
if (ids[i] == pred)
released[i] = true;
else flush &= released[i];
if (top > 0 && flush) {
cache.dump(next);
top = 0;
}
}
public String toString () { return "materialize("+next+")"; }
}
/** return the children of the current nodes that have the given tagname */
final class Child extends XPathHandler {
final String tagname; // the tagname of the child
boolean keep; // are we keeping or skipping events?
short level; // the depth level of the current element
public Child ( String tagname, XPathHandler next ) {
super(next);
this.tagname = tagname;
keep = false;
level = 0;
}
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {
if (level++ == 1)
keep = tagname.equals("*") || tagname.equals(qn);
if (keep)
next.startElement(nm,ln,qn,a);
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
if (keep)
next.endElement(nm,ln,qn);
if (--level == 1)
keep = false;
}
public void characters ( char[] text, int start, int length ) throws SAXException {
if (keep)
next.characters(text,start,length);
}
public String toString () { return "child("+tagname+","+next+")"; }
}
/** return the attribute value of the current nodes that have the given attribute name */
final class Attribute extends XPathHandler {
final String attributename;
short level; // the depth level of the current element
public Attribute ( String attribute_name, XPathHandler next ) {
super(next);
attributename = attribute_name;
level = 0;
}
public void startElement ( String nm, String ln, String qn, Attributes as ) throws SAXException {
if (level++ == 0)
for ( int i = 0; i < as.getLength(); i++ )
if (attributename.equals("*") || attributename.equals(as.getQName(i))) {
char[] s = as.getValue(i).toCharArray();
next.characters(s,0,s.length);
}
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
--level;
}
public void characters ( char[] text, int start, int length ) throws SAXException {
}
public String toString () { return "attribute("+attributename+","+next+")"; }
}
/** Return the descendants of the current nodes that have the given tagname.
* To handle nested elements with the same tagname, use Descendant
*/
final class SimpleDescendant extends XPathHandler {
final String tagname; // the tagname of the descendant
boolean keep; // are we keeping or skipping events?
public SimpleDescendant ( String tagname, XPathHandler next ) {
super(next);
this.tagname = tagname;
keep = false;
}
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {
if (!keep)
keep = tagname.equals(qn);
if (keep)
next.startElement(nm,ln,qn,a);
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
if (keep) {
next.endElement(nm,ln,qn);
keep = !tagname.equals(qn);
}
}
public void characters ( char[] text, int start, int length ) throws SAXException {
if (keep)
next.characters(text,start,length);
}
public String toString () { return "simple_descendant("+tagname+","+next+")"; }
}
/** As efficient as SimpleDescendant when there are no nested elements with the same tagname.
* It caches only the inner nested subelements with the same tagname.
*/
final class Descendant extends XPathHandler {
final static int max_nested_level = 100;
final String tagname; // the tagname of the descendant
int level; // # of nested elements with the same tagname
final Cache[] cache; // cache[i] caches elements of level i-1
public Descendant ( String tagname, XPathHandler next ) {
super(next);
this.tagname = tagname;
cache = new Cache[max_nested_level]; // to be created lazily
level = 0;
}
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {
if (tagname.equals(qn)) {
if (level > 0 && cache[level-1] == null)
cache[level-1] = new Cache();
level++;
};
for (int i = 1; i < level; i++)
cache[i-1].cacheStartElement(nm,ln,qn,a);
if (level > 0)
next.startElement(nm,ln,qn,a);
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
if (level > 0)
next.endElement(nm,ln,qn);
for (int i = 1; i < level; i++)
cache[i-1].cacheEndElement(nm,ln,qn);
if (tagname.equals(qn)) {
level--;
if (level == 0 && cache[0] != null)
cache[0].dump(next);
else if (level > 0 && cache[level] != null)
cache[level-1].append(cache[level]);
}
}
public void characters ( char[] text, int start, int length ) throws SAXException {
for (int i = 1; i < level; i++)
cache[i-1].cacheCharacters(text,start,length);
if (level > 0)
next.characters(text,start,length);
}
public String toString () { return "descendant("+tagname+","+next+")"; }
}
/** propagates all input signals to both next and condition streams but wraps each
* top-level element in the next stream with start/end Condition signals
*/
final class Predicate extends XPathHandler {
int level;
final XPathHandler condition; // false, if stream is empty
final int predicate_id; // the id of the predicate
public Predicate ( int predicate_id, XPathHandler condition, XPathHandler next ) {
super(next);
this.condition = condition;
this.predicate_id = predicate_id;
level = 0;
}
public void startPredicate ( int pred ) {
next.startPredicate(pred);
condition.startPredicate(pred);
}
public void endPredicate ( int pred ) {
next.endPredicate(pred);
condition.endPredicate(pred);
}
public void releasePredicate ( int pred ) {
next.releasePredicate(pred);
condition.releasePredicate(pred);
}
public void startDocument () throws SAXException {
next.startDocument();
condition.startDocument();
}
public void endDocument () throws SAXException {
next.endDocument();
condition.endDocument();
}
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {
if (level++ == 0)
next.startPredicate(predicate_id);
next.startElement(nm,ln,qn,a);
condition.startElement(nm,ln,qn,a);
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
next.endElement(nm,ln,qn);
condition.endElement(nm,ln,qn);
if (--level == 0)
next.endPredicate(predicate_id);
}
public void characters ( char[] text, int start, int length ) throws SAXException {
next.characters(text,start,length);
condition.characters(text,start,length);
}
public String toString () { return "predicate("+predicate_id+","+condition+","+next+")"; }
}
/** generate a releasePredicate signal if the content of the input node is equal to text */
final class Equals extends XPathHandler {
final static int max_depth_of_nested_predicates = 100;
final String value; // the value to be tested for equality
final int predicate_id; // the id of the predicate
final int[] preds;
int top;
boolean suspended;
public Equals ( int predicate_id, String value, XPathHandler next ) {
super(next);
this.value = value;
this.predicate_id = predicate_id;
preds = new int[max_depth_of_nested_predicates];
top = 0;
suspended = false;
}
private boolean compare ( char[] text, int start, int length, String value ) {
if (length != value.length())
return false;
for (int i = 0; i < length; i++)
if (text[i+start] != value.charAt(i))
return false;
return true;
}
public void startPredicate ( int pred ) {
preds[top++] = pred;
}
public void endPredicate ( int pred ) {
suspended = false;
for (int i = 0; i < top; i++)
if (preds[i] == pred) {
preds[i] = preds[--top];
return;
}
}
public void releasePredicate ( int pred ) {
if (top == 1 && suspended)
next.releasePredicate(predicate_id);
endPredicate(pred);
}
public void startDocument () throws SAXException {}
public void endDocument () throws SAXException {}
public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException {}
public void endElement ( String nm, String ln, String qn ) throws SAXException {}
public void characters ( char[] text, int start, int length ) throws SAXException {
if (compare(text,start,length,value))
if (top == 0)
next.releasePredicate(predicate_id);
else suspended = true;
}
public String toString () { return "equals("+predicate_id+","+value+","+next+")"; }
}
/** Converts the SAX data stream to MRData */
final class MRDataHandler extends XPathHandler {
Stack<Union> stack = new Stack<Union>();
Bag children;
public MRDataHandler () throws Exception {
super(null);
stack.clear();
Tuple t = new Tuple(3);
children = new Bag();
t.set(2,children);
stack.push(new Union((byte)0,t));
}
public void start () throws Exception {
stack.clear();
Tuple t = new Tuple(3);
children = new Bag();
t.set(2,children);
stack.push(new Union((byte)0,t));
}
public Bag value () {
if (stack.size() != 1)
return null;
else return ((Bag)((Tuple)stack.peek().value()).get(2));
}
public void startDocument () throws SAXException {}
public void endDocument () throws SAXException {}
public void startElement ( String nm, String ln, String qn, Attributes as ) throws SAXException {
children = new Bag();
Bag attributes = new Bag();
for ( int i = 0; i < as.getLength(); i++ )
attributes.add(new Tuple(new MR_string(as.getQName(i)),new MR_string(as.getValue(i))));
Tuple t = new Tuple(3);
t.set(0,new MR_string(qn));
t.set(1,attributes);
t.set(2,children);
stack.push(new Union((byte)0,t));
}
public void endElement ( String nm, String ln, String qn ) throws SAXException {
if (stack.empty())
throw new SAXException("Ill-formed XML elements: "+qn);
Union v = stack.pop();
if (!((MR_string)((Tuple)v.value()).get(0)).get().equals(qn))
throw new SAXException("Unmatched tags in XML element: "+qn);
children = (Bag)((Tuple)stack.peek().value()).get(2);
children.add(v);
}
public void characters ( char[] text, int start, int length ) throws SAXException {
String s = new String(text,start,length);
if (s.startsWith("{{") && s.endsWith("}}"))
children.add(new MR_variable(Integer.parseInt(s.substring(2,s.length()-2))));
else children.add(new Union((byte)1,new MR_string(s)));
}
public String toString () { return "MRDataHandler()"; }
}
public XPathParser ( Tree xpath ) throws Exception {
dataConstructor = new MRDataHandler();
handler = compile_xpath(xpath,new Materialize(dataConstructor),0);
}
public XPathHandler compile_xpath ( Tree xpath, XPathHandler next, int cn ) throws Exception {
if (xpath.is_variable() && xpath.toString().equals("dot"))
return next;
if (!xpath.is_node())
throw new Error("Unrecognized xpath query: "+xpath);
Node n = (Node) xpath;
if (n.name().equals("child")) {
String tag = n.children().head().toString();
XPathHandler c = (tag.charAt(0) == '@')
? new Attribute(tag.substring(1),next)
: new Child(tag,next);
return compile_xpath(n.children().tail().head(),c,cn);
} else if (n.name().equals("descendant")) {
XPathHandler c = new Descendant(n.children().head().toString(),next);
return compile_xpath(n.children().tail().head(),c,cn);
} else if (n.name().equals("eq")) {
Tree value = n.children().tail().head();
XPathHandler c = new Equals(cn,(value.is_string())
? ((StringLeaf)value).value()
: value.toString(),
next);
return compile_xpath(n.children().head(),c,cn);
} else if (n.name().equals("predicate")) {
XPathHandler c = new Predicate(cn+1,compile_xpath(n.children().head(),next,cn+1),next);
return compile_xpath(n.children().tail().head(),c,cn);
};
throw new Error("Unrecognized xpath query: "+xpath);
}
}