blob: 14b27039a36b694ab1cf53e57c1e006da38a7265 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.parquet.metadata;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import java.util.List;
import java.util.Map;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V1;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V2;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V3;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V3_1;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V3_2;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V3_3;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V4;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V4_1;
import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V4_2;
public class MetadataBase {
/**
* Basic class for parquet metadata. Inheritors of this class are json serializable structures which contain
* different metadata versions for an entire parquet directory structure
* <p>
* If any new code changes affect on the metadata files content, please update metadata version in such manner:
* Bump up metadata major version if metadata structure is changed.
* Bump up metadata minor version if only metadata content is changed, but metadata structure is the same.
* <p>
* Note: keep metadata versions synchronized with {@link MetadataVersion.Constants}
*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "metadata_version", visible = true)
@JsonSubTypes({
@JsonSubTypes.Type(value = Metadata_V1.ParquetTableMetadata_v1.class, name = V1),
@JsonSubTypes.Type(value = Metadata_V2.ParquetTableMetadata_v2.class, name = V2),
@JsonSubTypes.Type(value = Metadata_V3.ParquetTableMetadata_v3.class, name = V3),
@JsonSubTypes.Type(value = Metadata_V3.ParquetTableMetadata_v3.class, name = V3_1),
@JsonSubTypes.Type(value = Metadata_V3.ParquetTableMetadata_v3.class, name = V3_2),
@JsonSubTypes.Type(value = Metadata_V3.ParquetTableMetadata_v3.class, name = V3_3),
@JsonSubTypes.Type(value = Metadata_V4.ParquetTableMetadata_v4.class, name = V4),
@JsonSubTypes.Type(value = Metadata_V4.ParquetTableMetadata_v4.class, name = V4_1),
@JsonSubTypes.Type(value = Metadata_V4.ParquetTableMetadata_v4.class, name = V4_2),
})
public static abstract class ParquetTableMetadataBase {
@JsonIgnore
public abstract List<Path> getDirectories();
@JsonIgnore public abstract List<? extends ParquetFileMetadata> getFiles();
@JsonIgnore public abstract void assignFiles(List<? extends ParquetFileMetadata> newFiles);
public abstract boolean hasColumnMetadata();
@JsonIgnore public abstract PrimitiveType.PrimitiveTypeName getPrimitiveType(String[] columnName);
@JsonIgnore public abstract OriginalType getOriginalType(String[] columnName);
@JsonIgnore public abstract Integer getRepetitionLevel(String[] columnName);
@JsonIgnore public abstract Integer getDefinitionLevel(String[] columnName);
@JsonIgnore public abstract Integer getScale(String[] columnName);
@JsonIgnore public abstract Integer getPrecision(String[] columnName);
@JsonIgnore public abstract boolean isRowGroupPrunable();
@JsonIgnore public abstract ParquetTableMetadataBase clone();
@JsonIgnore public abstract String getDrillVersion();
@JsonIgnore public abstract String getMetadataVersion();
@JsonIgnore public abstract List<? extends ColumnTypeMetadata> getColumnTypeInfoList();
@JsonIgnore
public Type.Repetition getRepetition(String[] columnName) {
return null;
}
}
public static abstract class ParquetFileMetadata {
@JsonIgnore public abstract Path getPath();
@JsonIgnore public abstract Long getLength();
@JsonIgnore public abstract List<? extends RowGroupMetadata> getRowGroups();
}
public static abstract class RowGroupMetadata {
@JsonIgnore public abstract Long getStart();
@JsonIgnore public abstract Long getLength();
@JsonIgnore public abstract Long getRowCount();
@JsonIgnore public abstract Map<String, Float> getHostAffinity();
@JsonIgnore public abstract List<? extends ColumnMetadata> getColumns();
@JsonIgnore public boolean isEmpty() {
Long rowCount = getRowCount();
return rowCount != null && rowCount == 0;
}
}
public static abstract class ColumnMetadata {
/**
* Number of nulls is considered to be valid if its value is not null and -1.
*
* @return true if nulls value is defined, false otherwise
*/
@JsonIgnore
public boolean isNumNullsSet() {
Long nulls = getNulls();
return nulls != null && nulls != -1;
}
public abstract String[] getName();
public abstract Long getNulls();
public abstract boolean hasSingleValue(long rowCount);
public abstract Object getMinValue();
public abstract Object getMaxValue();
/**
* Set the max value recorded in the parquet metadata statistics.
*
* This object would just be immutable, but due to Drill-4203 we need to correct
* date values that had been corrupted by earlier versions of Drill.
*/
public abstract void setMax(Object newMax);
/**
* Set the min value recorded in the parquet metadata statistics.
*
* This object would just be immutable, but due to Drill-4203 we need to correct
* date values that had been corrupted by earlier versions of Drill.
*/
public abstract void setMin(Object newMax);
public abstract PrimitiveType.PrimitiveTypeName getPrimitiveType();
public abstract OriginalType getOriginalType();
}
public static abstract class ColumnTypeMetadata {
public abstract PrimitiveType.PrimitiveTypeName getPrimitiveType();
public abstract String[] getName();
}
}