blob: 13ab80b01eff850652c2e3c2babcc7be7e5a7cbe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.compression.CompressionCodecFactory;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.format.converter.ParquetMetadataConverter.MetadataFilter;
import org.apache.parquet.hadoop.util.HadoopCodecs;
import java.util.Map;
import static org.apache.parquet.hadoop.ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.DICTIONARY_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.getFilter;
import static org.apache.parquet.hadoop.ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.RECORD_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.STATS_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.UnmaterializableRecordCounter.BAD_RECORD_THRESHOLD_CONF_KEY;
public class HadoopReadOptions extends ParquetReadOptions {
private final Configuration conf;
private static final String ALLOCATION_SIZE = "parquet.read.allocation.size";
private HadoopReadOptions(boolean useSignedStringMinMax,
boolean useStatsFilter,
boolean useDictionaryFilter,
boolean useRecordFilter,
boolean useColumnIndexFilter,
boolean usePageChecksumVerification,
FilterCompat.Filter recordFilter,
MetadataFilter metadataFilter,
CompressionCodecFactory codecFactory,
ByteBufferAllocator allocator,
int maxAllocationSize,
Map<String, String> properties,
Configuration conf) {
super(
useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, useColumnIndexFilter,
usePageChecksumVerification, recordFilter, metadataFilter, codecFactory, allocator, maxAllocationSize,
properties
);
this.conf = conf;
}
@Override
public String getProperty(String property) {
String value = super.getProperty(property);
if (value != null) {
return value;
}
return conf.get(property);
}
public Configuration getConf() {
return conf;
}
public static Builder builder(Configuration conf) {
return new Builder(conf);
}
public static class Builder extends ParquetReadOptions.Builder {
private final Configuration conf;
public Builder(Configuration conf) {
this.conf = conf;
useSignedStringMinMax(conf.getBoolean("parquet.strings.signed-min-max.enabled", false));
useDictionaryFilter(conf.getBoolean(DICTIONARY_FILTERING_ENABLED, true));
useStatsFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true));
useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true));
useColumnIndexFilter(conf.getBoolean(COLUMN_INDEX_FILTERING_ENABLED, true));
usePageChecksumVerification(conf.getBoolean(PAGE_VERIFY_CHECKSUM_ENABLED,
usePageChecksumVerification));
withCodecFactory(HadoopCodecs.newFactory(conf, 0));
withRecordFilter(getFilter(conf));
withMaxAllocationInBytes(conf.getInt(ALLOCATION_SIZE, 8388608));
String badRecordThresh = conf.get(BAD_RECORD_THRESHOLD_CONF_KEY);
if (badRecordThresh != null) {
set(BAD_RECORD_THRESHOLD_CONF_KEY, badRecordThresh);
}
}
@Override
public ParquetReadOptions build() {
return new HadoopReadOptions(
useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter,
useColumnIndexFilter, usePageChecksumVerification, recordFilter, metadataFilter,
codecFactory, allocator, maxAllocationSize, properties, conf);
}
}
}