blob: 039ebb1a9e72f23f996499c819b924ad9eaa8d40 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id: XMPPacketParser.java 750418 2009-03-05 11:03:54Z vhennebert $ */
package org.apache.tika.parser.image.xmp;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
/**
* This class is a parser for XMP packets. By default, it tries to locate the first XMP packet
* it finds and parses it.
* <p>
* Important: Before you use this class to look for an XMP packet in some random file, please read
* the chapter on "Scanning Files for XMP Packets" in the XMP specification!
* <p>
* Thic class was branched from http://xmlgraphics.apache.org/ XMPPacketParser.
* See also org.semanticdesktop.aperture.extractor.xmp.XMPExtractor, a variant.
*/
public class XMPPacketScanner {
private static final byte[] PACKET_HEADER;
private static final byte[] PACKET_HEADER_END;
private static final byte[] PACKET_TRAILER;
static {
try {
PACKET_HEADER = "<?xpacket begin=".getBytes("US-ASCII");
PACKET_HEADER_END = "?>".getBytes("US-ASCII");
PACKET_TRAILER = "<?xpacket".getBytes("US-ASCII");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Incompatible JVM! US-ASCII encoding not supported.");
}
}
/**
* Locates an XMP packet in a stream, parses it and returns the XMP metadata. If no
* XMP packet is found until the stream ends, null is returned. Note: This method
* only finds the first XMP packet in a stream. And it cannot determine whether it
* has found the right XMP packet if there are multiple packets.
*
* Does <em>not</em> close the stream.
* If XMP block was found reading can continue below the block.
*
* @param in the InputStream to search
* @param xmlOut to write the XMP packet to
* @return true if XMP packet is found, false otherwise
* @throws IOException if an I/O error occurs
* @throws TransformerException if an error occurs while parsing the XMP packet
*/
public boolean parse(InputStream in, OutputStream xmlOut) throws IOException {
if (!in.markSupported()) {
in = new java.io.BufferedInputStream(in);
}
boolean foundXMP = skipAfter(in, PACKET_HEADER);
if (!foundXMP) {
return false;
}
//TODO Inspect "begin" attribute!
if (!skipAfter(in, PACKET_HEADER_END)) {
throw new IOException("Invalid XMP packet header!");
}
//TODO Do with TeeInputStream when Commons IO 1.4 is available
if (!skipAfter(in, PACKET_TRAILER, xmlOut)) {
throw new IOException("XMP packet not properly terminated!");
}
return true;
}
private static boolean skipAfter(InputStream in, byte[] match) throws IOException {
return skipAfter(in, match, null);
}
private static boolean skipAfter(InputStream in, byte[] match, OutputStream out)
throws IOException {
int found = 0;
int len = match.length;
int b;
while ((b = in.read()) >= 0) {
if (b == match[found]) {
found++;
if (found == len) {
return true;
}
} else {
if (out != null) {
if (found > 0) {
out.write(match, 0, found);
}
out.write(b);
}
found = 0;
}
}
return false;
}
}