blob: d9d8e1d3d40d79502eea60ac3afe5976c6fe4061 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.filter2.recordlevel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector;
import org.apache.parquet.io.PrimitiveColumnIO;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.RecordMaterializer;
import static org.apache.parquet.Preconditions.checkNotNull;
/**
* A pass-through proxy for a {@link RecordMaterializer} that updates a {@link IncrementallyUpdatedFilterPredicate}
* as it receives concrete values for the current record. If, after the record assembly signals that
* there are no more values, the predicate indicates that this record should be dropped, {@link #getCurrentRecord()}
* returns null to signal that this record is being skipped.
* Otherwise, the record is retrieved from the delegate.
*/
public class FilteringRecordMaterializer<T> extends RecordMaterializer<T> {
// the real record materializer
private final RecordMaterializer<T> delegate;
// the proxied root converter
private final FilteringGroupConverter rootConverter;
// the predicate
private final IncrementallyUpdatedFilterPredicate filterPredicate;
public FilteringRecordMaterializer(
RecordMaterializer<T> delegate,
List<PrimitiveColumnIO> columnIOs,
Map<ColumnPath, List<ValueInspector>> valueInspectorsByColumn,
IncrementallyUpdatedFilterPredicate filterPredicate) {
checkNotNull(columnIOs, "columnIOs");
checkNotNull(valueInspectorsByColumn, "valueInspectorsByColumn");
this.filterPredicate = checkNotNull(filterPredicate, "filterPredicate");
this.delegate = checkNotNull(delegate, "delegate");
// keep track of which path of indices leads to which primitive column
Map<List<Integer>, PrimitiveColumnIO> columnIOsByIndexFieldPath = new HashMap<>();
for (PrimitiveColumnIO c : columnIOs) {
List<Integer> indexFieldPath = Arrays.stream(c.getIndexFieldPath())
.boxed().collect(Collectors.toList());
columnIOsByIndexFieldPath.put(indexFieldPath, c);
}
// create a proxy for the delegate's root converter
this.rootConverter = new FilteringGroupConverter(
delegate.getRootConverter(), Collections.emptyList(),
valueInspectorsByColumn, columnIOsByIndexFieldPath);
}
@Override
public T getCurrentRecord() {
// find out if the predicate thinks we should keep this record
boolean keep = IncrementallyUpdatedFilterPredicateEvaluator.evaluate(filterPredicate);
// reset the stateful predicate no matter what
IncrementallyUpdatedFilterPredicateResetter.reset(filterPredicate);
// null - signals a skip
return (keep) ? delegate.getCurrentRecord() : null;
}
@Override
public void skipCurrentRecord() {
delegate.skipCurrentRecord();
}
@Override
public GroupConverter getRootConverter() {
return rootConverter;
}
// The following two methods are kept for backward compatibility
@Deprecated
public static List<Integer> getIndexFieldPathList(PrimitiveColumnIO c) {
return intArrayToList(c.getIndexFieldPath());
}
@Deprecated
public static List<Integer> intArrayToList(int[] arr) {
List<Integer> list = new ArrayList<>(arr.length);
for (int i : arr) {
list.add(i);
}
return list;
}
}