blob: beb1d1d05fce031b1fad72ad1fa1aabb1ba3e0ea [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.image;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.EndianUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Photoshop;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
/**
* Parser for the Better Portable Graphics (BPG) File Format.
* <p/>
* Documentation on the file format is available from
* http://bellard.org/bpg/bpg_spec.txt
*/
public class BPGParser extends AbstractImageParser {
protected static final int EXTENSION_TAG_EXIF = 1;
protected static final int EXTENSION_TAG_ICC_PROFILE = 2;
protected static final int EXTENSION_TAG_XMP = 3;
protected static final int EXTENSION_TAG_THUMBNAIL = 4;
private static final long serialVersionUID = -161736541253892772L;
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
new HashSet<>(
Arrays.asList(MediaType.image("x-bpg"), MediaType.image("bpg"))));
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
@Override
void extractMetadata(InputStream stream, ContentHandler contentHandler, Metadata metadata,
ParseContext parseContext)
throws IOException, SAXException, TikaException {
// Check for the magic header signature
byte[] signature = new byte[4];
IOUtils.readFully(stream, signature);
if (signature[0] == (byte) 'B' && signature[1] == (byte) 'P' &&
signature[2] == (byte) 'G' && signature[3] == (byte) 0xfb) {
// Good, signature found
} else {
throw new TikaException("BPG magic signature invalid");
}
// Grab and decode the first byte
int pdf = stream.read();
// Pixel format: Greyscale / 4:2:0 / 4:2:2 / 4:4:4
int pixelFormat = pdf & 0x7;
// TODO Identify a suitable metadata key for this
// Is there an alpha plane as well as a colour plane?
boolean hasAlphaPlane1 = (pdf & 0x8) == 0x8;
// TODO Identify a suitable metadata key for this+hasAlphaPlane2
// Bit depth minus 8
int bitDepth = (pdf >> 4) + 8;
metadata.set(TIFF.BITS_PER_SAMPLE, Integer.toString(bitDepth));
// Grab and decode the second byte
int cer = stream.read();
// Colour Space: YCbCr / RGB / YCgCo / YCbCrK / CMYK
int colourSpace = cer & 0x15;
switch (colourSpace) {
case 0:
metadata.set(Photoshop.COLOR_MODE, "YCbCr Colour");
break;
case 1:
metadata.set(Photoshop.COLOR_MODE, "RGB Colour");
break;
case 2:
metadata.set(Photoshop.COLOR_MODE, "YCgCo Colour");
break;
case 3:
metadata.set(Photoshop.COLOR_MODE, "YCbCrK Colour");
break;
case 4:
metadata.set(Photoshop.COLOR_MODE, "CMYK Colour");
break;
}
// Are there extensions or not?
boolean hasExtensions = (cer & 16) == 16;
// Is the Alpha Plane 2 flag set?
boolean hasAlphaPlane2 = (cer & 32) == 32;
// cer then holds 2 more booleans - limited range, reserved
// Width and height next
int width = (int) EndianUtils.readUE7(stream);
int height = (int) EndianUtils.readUE7(stream);
metadata.set(TIFF.IMAGE_LENGTH, height);
metadata.set(TIFF.IMAGE_WIDTH, width);
// Picture Data length
EndianUtils.readUE7(stream);
// Extension Data Length, if extensions present
long extensionDataLength = 0;
if (hasExtensions) {
extensionDataLength = EndianUtils.readUE7(stream);
}
// Alpha Data Length, if alpha used
long alphaDataLength = 0;
if (hasAlphaPlane1 || hasAlphaPlane2) {
alphaDataLength = EndianUtils.readUE7(stream);
}
// Extension Data
if (hasExtensions) {
long extensionsDataSeen = 0;
ImageMetadataExtractor metadataExtractor = new ImageMetadataExtractor(metadata);
while (extensionsDataSeen < extensionDataLength) {
int extensionType = (int) EndianUtils.readUE7(stream);
int extensionLength = (int) EndianUtils.readUE7(stream);
switch (extensionType) {
case EXTENSION_TAG_EXIF:
metadataExtractor.parseRawExif(stream, extensionLength, true);
break;
case EXTENSION_TAG_XMP:
handleXMP(stream, extensionLength, metadataExtractor);
break;
default:
stream.skip(extensionLength);
}
extensionsDataSeen += extensionLength;
}
}
// HEVC Header + Data
// Alpha HEVC Header + Data
// We can't do anything with these parts
}
protected void handleXMP(InputStream stream, int xmpLength, ImageMetadataExtractor extractor)
throws IOException, TikaException, SAXException {
byte[] xmp = new byte[xmpLength];
IOUtils.readFully(stream, xmp);
extractor.parseRawXMP(xmp);
}
}