blob: cf6a1cadd969cf26ea4f738067eaf3f38e343554 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.internal.filter2.columnindex;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.PrimitiveIterator;
import java.util.Set;
import org.apache.parquet.filter2.compat.FilterCompat.Filter;
import org.apache.parquet.internal.column.columnindex.OffsetIndex;
/**
* Class representing row ranges in a row-group. These row ranges are calculated as a result of the column index based
* filtering. To be used iterate over the matching row indexes to be read from a row-group, retrieve the count of the
* matching rows or check overlapping of a row index range.
*
* @see ColumnIndexFilter#calculateRowRanges(Filter, ColumnIndexStore, Set, long)
*/
public class RowRanges {
private static class Range {
// Returns the union of the two ranges or null if there are elements between them.
private static Range union(Range left, Range right) {
if (left.from <= right.from) {
if (left.to + 1 >= right.from) {
return new Range(left.from, Math.max(left.to, right.to));
}
} else if (right.to + 1 >= left.from) {
return new Range(right.from, Math.max(left.to, right.to));
}
return null;
}
// Returns the intersection of the two ranges of null if they are not overlapped.
private static Range intersection(Range left, Range right) {
if (left.from <= right.from) {
if (left.to >= right.from) {
return new Range(right.from, Math.min(left.to, right.to));
}
} else if (right.to >= left.from) {
return new Range(left.from, Math.min(left.to, right.to));
}
return null;
}
final long from;
final long to;
// Creates a range of [from, to] (from and to are inclusive; empty ranges are not valid)
Range(long from, long to) {
assert from <= to;
this.from = from;
this.to = to;
}
long count() {
return to - from + 1;
}
boolean isBefore(Range other) {
return to < other.from;
}
boolean isAfter(Range other) {
return from > other.to;
}
@Override
public String toString() {
return "[" + from + ", " + to + ']';
}
}
static final RowRanges EMPTY = new RowRanges(Collections.emptyList());
private final List<Range> ranges;
private RowRanges() {
this(new ArrayList<>());
}
private RowRanges(Range range) {
this(Collections.singletonList(range));
}
private RowRanges(List<Range> ranges) {
this.ranges = ranges;
}
/**
* Creates an immutable RowRanges object with the single range [0, rowCount -
* 1].
*
* @param rowCount a single row count
* @return an immutable RowRanges
*/
static RowRanges createSingle(long rowCount) {
return new RowRanges(new Range(0L, rowCount - 1L));
}
/**
* Creates a mutable RowRanges object with the following ranges:
* <pre>
* [firstRowIndex[0], lastRowIndex[0]],
* [firstRowIndex[1], lastRowIndex[1]],
* ...,
* [firstRowIndex[n], lastRowIndex[n]]
* </pre>
* (See OffsetIndex.getFirstRowIndex and OffsetIndex.getLastRowIndex for details.)
*
* The union of the ranges are calculated so the result ranges always contain the disjunct ranges. See union for
* details.
*
* @param rowCount row count
* @param pageIndexes pageIndexes
* @param offsetIndex offsetIndex
* @return a mutable RowRanges
*/
static RowRanges create(long rowCount, PrimitiveIterator.OfInt pageIndexes, OffsetIndex offsetIndex) {
RowRanges ranges = new RowRanges();
while (pageIndexes.hasNext()) {
int pageIndex = pageIndexes.nextInt();
ranges.add(new Range(offsetIndex.getFirstRowIndex(pageIndex), offsetIndex.getLastRowIndex(pageIndex, rowCount)));
}
return ranges;
}
/*
* Calculates the union of the two specified RowRanges object. The union of two range is calculated if there are no
* elements between them. Otherwise, the two disjunct ranges are stored separately.
* For example:
* [113, 241] ∪ [221, 340] = [113, 330]
* [113, 230] ∪ [231, 340] = [113, 340]
* while
* [113, 230] ∪ [232, 340] = [113, 230], [232, 340]
*
* The result RowRanges object will contain all the row indexes that were contained in one of the specified objects.
*/
static RowRanges union(RowRanges left, RowRanges right) {
RowRanges result = new RowRanges();
Iterator<Range> it1 = left.ranges.iterator();
Iterator<Range> it2 = right.ranges.iterator();
if (it2.hasNext()) {
Range range2 = it2.next();
while (it1.hasNext()) {
Range range1 = it1.next();
if (range1.isAfter(range2)) {
result.add(range2);
range2 = range1;
Iterator<Range> tmp = it1;
it1 = it2;
it2 = tmp;
} else {
result.add(range1);
}
}
result.add(range2);
} else {
it2 = it1;
}
while (it2.hasNext()) {
result.add(it2.next());
}
return result;
}
/*
* Calculates the intersection of the two specified RowRanges object. Two ranges intersect if they have common
* elements otherwise the result is empty.
* For example:
* [113, 241] ∩ [221, 340] = [221, 241]
* while
* [113, 230] ∩ [231, 340] = <EMPTY>
*
* The result RowRanges object will contain all the row indexes there were contained in both of the specified objects
*/
static RowRanges intersection(RowRanges left, RowRanges right) {
RowRanges result = new RowRanges();
int rightIndex = 0;
for (Range l : left.ranges) {
for (int i = rightIndex, n = right.ranges.size(); i < n; ++i) {
Range r = right.ranges.get(i);
if (l.isBefore(r)) {
break;
} else if (l.isAfter(r)) {
rightIndex = i + 1;
continue;
}
result.add(Range.intersection(l, r));
}
}
return result;
}
/*
* Adds a range to the end of the list of ranges. It maintains the disjunct ascending order(*) of the ranges by
* trying to union the specified range to the last ranges in the list. The specified range shall be larger(*) than
* the last one or might be overlapped with some of the last ones.
* (*) [a, b] < [c, d] if b < c
*/
private void add(Range range) {
Range rangeToAdd = range;
for (int i = ranges.size() - 1; i >= 0; --i) {
Range last = ranges.get(i);
assert !last.isAfter(range);
Range u = Range.union(last, rangeToAdd);
if (u == null) {
break;
}
rangeToAdd = u;
ranges.remove(i);
}
ranges.add(rangeToAdd);
}
/**
* @return the number of rows in the ranges
*/
public long rowCount() {
long cnt = 0;
for (Range range : ranges) {
cnt += range.count();
}
return cnt;
}
/**
* @return the ascending iterator of the row indexes contained in the ranges
*/
public PrimitiveIterator.OfLong iterator() {
return new PrimitiveIterator.OfLong() {
private int currentRangeIndex = -1;
private Range currentRange;
private long next = findNext();
private long findNext() {
if (currentRange == null || next + 1 > currentRange.to) {
if (currentRangeIndex + 1 < ranges.size()) {
currentRange = ranges.get(++currentRangeIndex);
next = currentRange.from;
} else {
return -1;
}
} else {
++next;
}
return next;
}
@Override
public boolean hasNext() {
return next >= 0;
}
@Override
public long nextLong() {
long ret = next;
if (ret < 0) {
throw new NoSuchElementException();
}
next = findNext();
return ret;
}
};
}
/**
* @param from
* the first row of the range to be checked for connection
* @param to
* the last row of the range to be checked for connection
* @return {@code true} if the specified range is overlapping (have common elements) with one of the ranges
*/
public boolean isOverlapping(long from, long to) {
return Collections.binarySearch(ranges, new Range(from, to),
(r1, r2) -> r1.isBefore(r2) ? -1 : r1.isAfter(r2) ? 1 : 0) >= 0;
}
@Override
public String toString() {
return ranges.toString();
}
}