blob: 3e68c8630c3a185a3fa2a9ea6261024a6ae9d650 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.tools.command;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.FileMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
import org.apache.parquet.tools.util.PrettyPrintWriter;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
class MetadataUtils {
static void showDetails(PrettyPrintWriter out, ParquetMetadata meta, boolean showOriginalTypes) {
showDetails(out, meta.getFileMetaData(), showOriginalTypes);
long i = 1;
for (BlockMetaData bmeta : meta.getBlocks()) {
out.println();
showDetails(out, bmeta, i++);
}
}
static void showDetails(PrettyPrintWriter out, FileMetaData meta, boolean showOriginalTypes) {
out.format("creator: %s%n", meta.getCreatedBy());
Map<String,String> extra = meta.getKeyValueMetaData();
if (extra != null) {
for (Map.Entry<String,String> entry : meta.getKeyValueMetaData().entrySet()) {
out.print("extra: ");
out.incrementTabLevel();
out.format("%s = %s%n", entry.getKey(), entry.getValue());
out.decrementTabLevel();
}
}
out.println();
out.format("file schema: %s%n", meta.getSchema().getName());
out.rule('-');
showDetails(out, meta.getSchema(), showOriginalTypes);
}
private static void showDetails(PrettyPrintWriter out, BlockMetaData meta, Long num) {
long rows = meta.getRowCount();
long tbs = meta.getTotalByteSize();
long offset = meta.getStartingPos();
out.format("row group%s: RC:%d TS:%d OFFSET:%d%n", (num == null ? "" : " " + num), rows, tbs, offset);
out.rule('-');
showDetails(out, meta.getColumns());
}
static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) {
Map<String,Object> chunks = new LinkedHashMap<String,Object>();
for (ColumnChunkMetaData cmeta : ccmeta) {
String[] path = cmeta.getPath().toArray();
Map<String,Object> current = chunks;
for (int i = 0; i < path.length - 1; ++i) {
String next = path[i];
if (!current.containsKey(next)) {
current.put(next, new LinkedHashMap<String,Object>());
}
current = (Map<String,Object>)current.get(next);
}
current.put(path[path.length - 1], cmeta);
}
showColumnChunkDetails(out, chunks, 0);
}
private static void showColumnChunkDetails(PrettyPrintWriter out, Map<String,Object> current, int depth) {
for (Map.Entry<String,Object> entry : current.entrySet()) {
String name = Strings.repeat(".", depth) + entry.getKey();
Object value = entry.getValue();
if (value instanceof Map) {
out.println(name + ": ");
showColumnChunkDetails(out, (Map<String,Object>)value, depth + 1);
} else {
out.print(name + ": ");
showDetails(out, (ColumnChunkMetaData)value, false);
}
}
}
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
long doff = meta.getDictionaryPageOffset();
long foff = meta.getFirstDataPageOffset();
long tsize = meta.getTotalSize();
long usize = meta.getTotalUncompressedSize();
long count = meta.getValueCount();
double ratio = usize / (double)tsize;
String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());
if (name) {
String path = Joiner.on('.').skipNulls().join(meta.getPath());
out.format("%s: ", path);
}
out.format(" %s", meta.getType());
out.format(" %s", meta.getCodec());
out.format(" DO:%d", doff);
out.format(" FPO:%d", foff);
out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
out.format(" VC:%d", count);
if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
Statistics<?> stats = meta.getStatistics();
if (stats != null) {
out.format(" ST:[%s]", stats);
} else {
out.format(" ST:[none]");
}
out.println();
}
static void showDetails(PrettyPrintWriter out, MessageType type, boolean showOriginalTypes) {
List<String> cpath = new ArrayList<String>();
for (Type ftype : type.getFields()) {
showDetails(out, ftype, 0, type, cpath, showOriginalTypes);
}
}
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
String name = Strings.repeat(".", depth) + type.getName();
Repetition rep = type.getRepetition();
int fcount = type.getFieldCount();
out.format("%s: %s F:%d%n", name, rep, fcount);
cpath.add(type.getName());
for (Type ftype : type.getFields()) {
showDetails(out, ftype, depth + 1, container, cpath, showOriginalTypes);
}
cpath.remove(cpath.size() - 1);
}
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
String name = Strings.repeat(".", depth) + type.getName();
Repetition rep = type.getRepetition();
PrimitiveTypeName ptype = type.getPrimitiveTypeName();
out.format("%s: %s %s", name, rep, ptype);
if (showOriginalTypes) {
OriginalType otype;
try {
otype = type.getOriginalType();
} catch (Exception e) {
otype = null;
}
if (otype != null) out.format(" O:%s", otype);
} else {
LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
if (ltype != null) out.format(" L:%s", ltype);
}
if (container != null) {
cpath.add(type.getName());
String[] paths = cpath.toArray(new String[0]);
cpath.remove(cpath.size() - 1);
ColumnDescriptor desc = container.getColumnDescription(paths);
int defl = desc.getMaxDefinitionLevel();
int repl = desc.getMaxRepetitionLevel();
out.format(" R:%d D:%d", repl, defl);
}
out.println();
}
private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
if (type instanceof GroupType) {
showDetails(out, type.asGroupType(), depth, container, cpath, showOriginalTypes);
return;
} else if (type instanceof PrimitiveType) {
showDetails(out, type.asPrimitiveType(), depth, container, cpath, showOriginalTypes);
return;
}
}
}