blob: 71cc7c0ab95e921d6e5fbe96c98b48b0439efd8c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.impl;
import static org.apache.orc.impl.ReaderImpl.extractMetadata;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.orc.CompressionCodec;
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcProto;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;
// TODO: Make OrcTail implement FileMetadata or Reader interface
public final class OrcTail {
// postscript + footer - Serialized in OrcSplit
private final OrcProto.FileTail fileTail;
// serialized representation of metadata, footer and postscript
private final ByteBuffer serializedTail;
// used to invalidate cache entries
private final long fileModificationTime;
// lazily deserialized
private OrcProto.Metadata metadata;
public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
this(fileTail, serializedTail, -1);
}
public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
this.fileTail = fileTail;
this.serializedTail = serializedTail;
this.fileModificationTime = fileModificationTime;
this.metadata = null;
}
public ByteBuffer getSerializedTail() {
return serializedTail;
}
public long getFileModificationTime() {
return fileModificationTime;
}
public OrcProto.Footer getFooter() {
return fileTail.getFooter();
}
public OrcProto.PostScript getPostScript() {
return fileTail.getPostscript();
}
public OrcFile.WriterVersion getWriterVersion() {
OrcProto.PostScript ps = fileTail.getPostscript();
OrcProto.Footer footer = fileTail.getFooter();
OrcFile.WriterImplementation writer =
OrcFile.WriterImplementation.from(footer.getWriter());
return OrcFile.WriterVersion.from(writer, ps.getWriterVersion());
}
public List<StripeInformation> getStripes() {
List<StripeInformation> result = new ArrayList<>(fileTail.getFooter().getStripesCount());
for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
}
return result;
}
public CompressionKind getCompressionKind() {
return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
}
public int getCompressionBufferSize() {
return (int) fileTail.getPostscript().getCompressionBlockSize();
}
/**
* Get the stripe statistics from the file tail.
* This code is for compatibility with ORC 1.5.8, but doesn't handle
* proleptic calendar well. In particular, it does not have access to a
* Configuration object, so it assumes false for both
* writerUsedProlepticGregorian and convertToProlepticGregorian.
* @return the stripe statistics
* @deprecated the user should use Reader.getStripeStatistics instead.
*/
public List<StripeStatistics> getStripeStatistics() throws IOException {
OrcProto.Footer footer = fileTail.getFooter();
boolean writerUsedProlepticGregorian =
footer.hasCalendar() ?
footer.getCalendar() == OrcProto.CalendarKind.PROLEPTIC_GREGORIAN :
false;
return getStripeStatistics(writerUsedProlepticGregorian, false);
}
public List<StripeStatistics> getStripeStatistics(
boolean writerUsedProlepticGregorian, boolean convertToProlepticGregorian)
throws IOException {
List<StripeStatistics> result = new ArrayList<>();
List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
if (ssProto != null) {
for (OrcProto.StripeStatistics ss : ssProto) {
result.add(new StripeStatistics(ss.getColStatsList(), writerUsedProlepticGregorian, convertToProlepticGregorian));
}
}
return result;
}
public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
if (serializedTail == null) return null;
if (metadata == null) {
CompressionCodec codec = OrcCodecPool.getCodec(getCompressionKind());
try {
metadata = extractMetadata(serializedTail, 0,
(int) fileTail.getPostscript().getMetadataLength(), codec, getCompressionBufferSize());
} finally {
OrcCodecPool.returnCodec(getCompressionKind(), codec);
}
// clear does not clear the contents but sets position to 0 and limit = capacity
serializedTail.clear();
}
return metadata.getStripeStatsList();
}
public int getMetadataSize() {
return (int) getPostScript().getMetadataLength();
}
public List<OrcProto.Type> getTypes() {
return getFooter().getTypesList();
}
public OrcProto.FileTail getFileTail() {
return fileTail;
}
public OrcProto.FileTail getMinimalFileTail() {
OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
footerBuilder.clearStatistics();
fileTailBuilder.setFooter(footerBuilder.build());
OrcProto.FileTail result = fileTailBuilder.build();
return result;
}
}