blob: 3b668c7571269e10c5a2b840c909f32eafcca96c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.parquet;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.ParquetReadOptions;
import java.util.Objects;
/**
* Stores consolidated parquet reading configuration. Can obtain config values from various sources:
* Assignment priority of configuration values is the following:
* <li>parquet format config</li>
* <li>Hadoop configuration</li>
* <li>session options</li>
*
* During serialization does not deserialize the default values in keep serialized object smaller.
*/
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public class ParquetReaderConfig {
public static final String ENABLE_BYTES_READ_COUNTER = "parquet.benchmark.bytes.read";
public static final String ENABLE_BYTES_TOTAL_COUNTER = "parquet.benchmark.bytes.total";
public static final String ENABLE_TIME_READ_COUNTER = "parquet.benchmark.time.read";
private static final ParquetReaderConfig DEFAULT_INSTANCE = new ParquetReaderConfig();
private boolean enableBytesReadCounter = false;
private boolean enableBytesTotalCounter = false;
private boolean enableTimeReadCounter = false;
private boolean autoCorrectCorruptedDates = true;
private boolean enableStringsSignedMinMax = false;
public static ParquetReaderConfig.Builder builder() {
return new ParquetReaderConfig.Builder();
}
public static ParquetReaderConfig getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@JsonCreator
public ParquetReaderConfig(@JsonProperty("enableBytesReadCounter") Boolean enableBytesReadCounter,
@JsonProperty("enableBytesTotalCounter") Boolean enableBytesTotalCounter,
@JsonProperty("enableTimeReadCounter") Boolean enableTimeReadCounter,
@JsonProperty("autoCorrectCorruptedDates") Boolean autoCorrectCorruptedDates,
@JsonProperty("enableStringsSignedMinMax") Boolean enableStringsSignedMinMax) {
this.enableBytesReadCounter = enableBytesReadCounter == null ? this.enableBytesReadCounter : enableBytesReadCounter;
this.enableBytesTotalCounter = enableBytesTotalCounter == null ? this.enableBytesTotalCounter : enableBytesTotalCounter;
this.enableTimeReadCounter = enableTimeReadCounter == null ? this.enableTimeReadCounter : enableTimeReadCounter;
this.autoCorrectCorruptedDates = autoCorrectCorruptedDates == null ? this.autoCorrectCorruptedDates : autoCorrectCorruptedDates;
this.enableStringsSignedMinMax = enableStringsSignedMinMax == null ? this.enableStringsSignedMinMax : enableStringsSignedMinMax;
}
private ParquetReaderConfig() { }
@JsonProperty("enableBytesReadCounter")
public boolean enableBytesReadCounter() {
return enableBytesReadCounter;
}
@JsonProperty("enableBytesTotalCounter")
public boolean enableBytesTotalCounter() {
return enableBytesTotalCounter;
}
@JsonProperty("enableTimeReadCounter")
public boolean enableTimeReadCounter() {
return enableTimeReadCounter;
}
@JsonProperty("autoCorrectCorruptedDates")
public boolean autoCorrectCorruptedDates() {
return autoCorrectCorruptedDates;
}
@JsonProperty("enableStringsSignedMinMax")
public boolean enableStringsSignedMinMax() {
return enableStringsSignedMinMax;
}
public ParquetReadOptions toReadOptions() {
return ParquetReadOptions.builder()
.useSignedStringMinMax(enableStringsSignedMinMax)
.build();
}
public Configuration addCountersToConf(Configuration conf) {
Configuration newConfig = new Configuration(conf);
newConfig.setBoolean(ENABLE_BYTES_READ_COUNTER, enableBytesReadCounter);
newConfig.setBoolean(ENABLE_BYTES_TOTAL_COUNTER, enableBytesTotalCounter);
newConfig.setBoolean(ENABLE_TIME_READ_COUNTER, enableTimeReadCounter);
return newConfig;
}
@Override
public int hashCode() {
return Objects.hash(enableBytesReadCounter,
enableBytesTotalCounter,
enableTimeReadCounter,
autoCorrectCorruptedDates,
enableStringsSignedMinMax);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ParquetReaderConfig that = (ParquetReaderConfig) o;
return enableBytesReadCounter == that.enableBytesReadCounter
&& enableBytesTotalCounter == that.enableBytesTotalCounter
&& enableTimeReadCounter == that.enableTimeReadCounter
&& autoCorrectCorruptedDates == that.autoCorrectCorruptedDates
&& enableStringsSignedMinMax == that.enableStringsSignedMinMax;
}
@Override
public String toString() {
return "ParquetReaderConfig{"
+ "enableBytesReadCounter=" + enableBytesReadCounter
+ ", enableBytesTotalCounter=" + enableBytesTotalCounter
+ ", enableTimeReadCounter=" + enableTimeReadCounter
+ ", autoCorrectCorruptedDates=" + autoCorrectCorruptedDates
+ ", enableStringsSignedMinMax=" + enableStringsSignedMinMax
+ '}';
}
public static class Builder {
private ParquetFormatConfig formatConfig;
private Configuration conf;
private OptionManager options;
public Builder withFormatConfig(ParquetFormatConfig formatConfig) {
this.formatConfig = formatConfig;
return this;
}
public Builder withConf(Configuration conf) {
this.conf = conf;
return this;
}
public Builder withOptions(OptionManager options) {
this.options = options;
return this;
}
public ParquetReaderConfig build() {
ParquetReaderConfig readerConfig = new ParquetReaderConfig();
// first assign configuration values from format config
if (formatConfig != null) {
readerConfig.autoCorrectCorruptedDates = formatConfig.areCorruptDatesAutoCorrected();
readerConfig.enableStringsSignedMinMax = formatConfig.isStringsSignedMinMaxEnabled();
}
// then assign configuration values from Hadoop configuration
if (conf != null) {
readerConfig.enableBytesReadCounter = conf.getBoolean(ENABLE_BYTES_READ_COUNTER, readerConfig.enableBytesReadCounter);
readerConfig.enableBytesTotalCounter = conf.getBoolean(ENABLE_BYTES_TOTAL_COUNTER, readerConfig.enableBytesTotalCounter);
readerConfig.enableTimeReadCounter = conf.getBoolean(ENABLE_TIME_READ_COUNTER, readerConfig.enableTimeReadCounter);
}
// last assign values from session options, session options have higher priority than other configurations
if (options != null) {
String option = options.getOption(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX_VALIDATOR);
if (!option.isEmpty()) {
readerConfig.enableStringsSignedMinMax = Boolean.valueOf(option);
}
}
return readerConfig;
}
}
}