blob: ea5f153daafd501c47e35aeed97d91cc12486c67 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.sax.xpath;
import java.util.LinkedList;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* Content handler decorator that only passes the elements, attributes,
* and text nodes that match the given XPath expression.
*/
public class MatchingContentHandler extends ContentHandlerDecorator {
private final LinkedList<Matcher> matchers = new LinkedList<Matcher>();
private Matcher matcher;
public MatchingContentHandler(ContentHandler delegate, Matcher matcher) {
super(delegate);
this.matcher = matcher;
}
public void startElement(
String uri, String localName, String name, Attributes attributes)
throws SAXException {
matchers.addFirst(matcher);
matcher = matcher.descend(uri, localName);
AttributesImpl matches = new AttributesImpl();
for (int i = 0; i < attributes.getLength(); i++) {
String attributeURI = attributes.getURI(i);
String attributeName = attributes.getLocalName(i);
if (matcher.matchesAttribute(attributeURI, attributeName)) {
matches.addAttribute(
attributeURI, attributeName, attributes.getQName(i),
attributes.getType(i), attributes.getValue(i));
}
}
if (matcher.matchesElement() || matches.getLength() > 0) {
super.startElement(uri, localName, name, matches);
if (!matcher.matchesElement()) {
// Force the matcher to match the current element, so the
// endElement method knows to emit the correct event
matcher =
new CompositeMatcher(matcher, ElementMatcher.INSTANCE);
}
}
}
public void endElement(String uri, String localName, String name)
throws SAXException {
if (matcher.matchesElement()) {
super.endElement(uri, localName, name);
}
// Sometimes tagsoup returns double end tags, so the stack might
// be empty! TODO: Remove this when the tagsoup problem is fixed.
if (!matchers.isEmpty()) {
matcher = matchers.removeFirst();
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
if (matcher.matchesText()) {
super.characters(ch, start, length);
}
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
if (matcher.matchesText()) {
super.ignorableWhitespace(ch, start, length);
}
}
public void processingInstruction(String target, String data) {
// TODO: Support for matching processing instructions
}
public void skippedEntity(String name) throws SAXException {
// TODO: Can skipped entities refer to more than text?
if (matcher.matchesText()) {
super.skippedEntity(name);
}
}
}