blob: 6f1b45bfc5010bb8e2a94e4a34f87686b7bc139c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.detect;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
import javax.xml.parsers.SAXParserFactory;
import org.apache.tika.sax.OfflineContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* Utility class that uses a {@link javax.xml.parsers.SAXParser} to determine
* the namespace URI and local name of the root element of an XML file.
*
* @since Apache Tika 0.4
*/
public class XmlRootExtractor {
public QName extractRootElement(byte[] data) {
return extractRootElement(new ByteArrayInputStream(data));
}
/**
* @since Apache Tika 0.9
*/
public QName extractRootElement(InputStream stream) {
ExtractorHandler handler = new ExtractorHandler();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
factory.newSAXParser().parse(
stream, new OfflineContentHandler(handler));
} catch (Exception ignore) {
}
return handler.rootElement;
}
private static class ExtractorHandler extends DefaultHandler {
private QName rootElement = null;
@Override
public void startElement(
String uri, String local, String name, Attributes attributes)
throws SAXException {
this.rootElement = new QName(uri, local);
throw new SAXException("Aborting: root element received");
}
}
}