blob: f6922ed1991012496e28e57687fff149f348c201 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.engine.planner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.SortSpec;
import org.apache.tajo.datum.Datum;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.datum.NullDatum;
import org.apache.tajo.engine.exception.RangeOverflowException;
import org.apache.tajo.storage.Tuple;
import org.apache.tajo.storage.TupleRange;
import org.apache.tajo.storage.VTuple;
import org.apache.tajo.util.Bytes;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.List;
public class UniformRangePartition extends RangePartitionAlgorithm {
private int variableId;
private BigDecimal[] cardForEachDigit;
private BigDecimal[] colCards;
/**
*
* @param totalRange
* @param sortSpecs The description of sort keys
* @param inclusive true if the end of the range is inclusive
*/
public UniformRangePartition(TupleRange totalRange, SortSpec[] sortSpecs, boolean inclusive) {
super(sortSpecs, totalRange, inclusive);
colCards = new BigDecimal[sortSpecs.length];
for (int i = 0; i < sortSpecs.length; i++) {
colCards[i] = computeCardinality(sortSpecs[i].getSortKey().getDataType(), totalRange.getStart().get(i),
totalRange.getEnd().get(i), inclusive, sortSpecs[i].isAscending());
}
cardForEachDigit = new BigDecimal[colCards.length];
for (int i = 0; i < colCards.length ; i++) {
if (i == 0) {
cardForEachDigit[i] = colCards[i];
} else {
cardForEachDigit[i] = cardForEachDigit[i - 1].multiply(colCards[i]);
}
}
}
public UniformRangePartition(TupleRange range, SortSpec [] sortSpecs) {
this(range, sortSpecs, true);
}
@Override
public TupleRange[] partition(int partNum) {
Preconditions.checkArgument(partNum > 0,
"The number of partitions must be positive, but the given number: "
+ partNum);
Preconditions.checkArgument(totalCard.compareTo(new BigDecimal(partNum)) >= 0,
"the number of partition cannot exceed total cardinality (" + totalCard + ")");
int varId;
for (varId = 0; varId < cardForEachDigit.length; varId++) {
if (cardForEachDigit[varId].compareTo(new BigDecimal(partNum)) >= 0)
break;
}
this.variableId = varId;
BigDecimal [] reverseCardsForDigit = new BigDecimal[variableId+1];
for (int i = variableId; i >= 0; i--) {
if (i == variableId) {
reverseCardsForDigit[i] = colCards[i];
} else {
reverseCardsForDigit[i] = reverseCardsForDigit[i+1].multiply(colCards[i]);
}
}
List<TupleRange> ranges = Lists.newArrayList();
BigDecimal term = reverseCardsForDigit[0].divide(
new BigDecimal(partNum), RoundingMode.CEILING);
BigDecimal reminder = reverseCardsForDigit[0];
Tuple last = range.getStart();
while(reminder.compareTo(new BigDecimal(0)) > 0) {
if (reminder.compareTo(term) <= 0) { // final one is inclusive
ranges.add(new TupleRange(sortSpecs, last, range.getEnd()));
} else {
Tuple next = increment(last, term.longValue(), variableId);
ranges.add(new TupleRange(sortSpecs, last, next));
}
last = ranges.get(ranges.size() - 1).getEnd();
reminder = reminder.subtract(term);
}
return ranges.toArray(new TupleRange[ranges.size()]);
}
/**
* Check whether an overflow occurs or not.
*
* @param colId The column id to be checked
* @param last
* @param inc
* @param sortSpecs
* @return
*/
public boolean isOverflow(int colId, Datum last, BigDecimal inc, SortSpec [] sortSpecs) {
Column column = sortSpecs[colId].getSortKey();
BigDecimal candidate;
boolean overflow = false;
switch (column.getDataType().getType()) {
case BIT: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal(last.asByte()));
return new BigDecimal(range.getEnd().get(colId).asByte()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal(last.asByte()).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asByte())) < 0;
}
}
case CHAR: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal((int)last.asChar()));
return new BigDecimal((int)range.getEnd().get(colId).asChar()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal((int)last.asChar()).subtract(inc);
return candidate.compareTo(new BigDecimal((int)range.getEnd().get(colId).asChar())) < 0;
}
}
case INT2: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal(last.asInt2()));
return new BigDecimal(range.getEnd().get(colId).asInt2()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal(last.asInt2()).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt2())) < 0;
}
}
case DATE:
case INT4: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal(last.asInt4()));
return new BigDecimal(range.getEnd().get(colId).asInt4()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal(last.asInt4()).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt4())) < 0;
}
}
case TIME:
case TIMESTAMP:
case INT8: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal(last.asInt8()));
return new BigDecimal(range.getEnd().get(colId).asInt8()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal(last.asInt8()).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt8())) < 0;
}
}
case FLOAT4: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal(last.asFloat4()));
return new BigDecimal(range.getEnd().get(colId).asFloat4()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal(last.asFloat4()).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asFloat4())) < 0;
}
}
case FLOAT8: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal(last.asFloat8()));
return new BigDecimal(range.getEnd().get(colId).asFloat8()).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal(last.asFloat8()).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asFloat8())) < 0;
}
}
case TEXT: {
if (sortSpecs[colId].isAscending()) {
candidate = inc.add(new BigDecimal((int)(last instanceof NullDatum ? '0' : last.asChars().charAt(0))));
return new BigDecimal(range.getEnd().get(colId).asChars().charAt(0)).compareTo(candidate) < 0;
} else {
candidate = new BigDecimal((int)(last.asChars().charAt(0))).subtract(inc);
return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asChars().charAt(0))) < 0;
}
}
case INET4: {
int candidateIntVal;
byte[] candidateBytesVal = new byte[4];
if (sortSpecs[colId].isAscending()) {
candidateIntVal = inc.intValue() + last.asInt4();
if (candidateIntVal - inc.intValue() != last.asInt4()) {
return true;
}
Bytes.putInt(candidateBytesVal, 0, candidateIntVal);
return Bytes.compareTo(range.getEnd().get(colId).asByteArray(), candidateBytesVal) < 0;
} else {
candidateIntVal = last.asInt4() - inc.intValue();
if (candidateIntVal + inc.intValue() != last.asInt4()) {
return true;
}
Bytes.putInt(candidateBytesVal, 0, candidateIntVal);
return Bytes.compareTo(candidateBytesVal, range.getEnd().get(colId).asByteArray()) < 0;
}
}
}
return overflow;
}
public long incrementAndGetReminder(int colId, Datum last, long inc) {
Column column = sortSpecs[colId].getSortKey();
long reminder = 0;
switch (column.getDataType().getType()) {
case BIT: {
long candidate = last.asByte() + inc;
byte end = range.getEnd().get(colId).asByte();
reminder = candidate - end;
break;
}
case CHAR: {
long candidate = last.asChar() + inc;
char end = range.getEnd().get(colId).asChar();
reminder = candidate - end;
break;
}
case DATE:
case INT4: {
int candidate = (int) (last.asInt4() + inc);
int end = range.getEnd().get(colId).asInt4();
reminder = candidate - end;
break;
}
case TIME:
case TIMESTAMP:
case INT8:
case INET4: {
long candidate = last.asInt8() + inc;
long end = range.getEnd().get(colId).asInt8();
reminder = candidate - end;
break;
}
case FLOAT4: {
float candidate = last.asFloat4() + inc;
float end = range.getEnd().get(colId).asFloat4();
reminder = (long) (candidate - end);
break;
}
case FLOAT8: {
double candidate = last.asFloat8() + inc;
double end = range.getEnd().get(colId).asFloat8();
reminder = (long) Math.ceil(candidate - end);
break;
}
case TEXT: {
char candidate = ((char)(last.asChars().charAt(0) + inc));
char end = range.getEnd().get(colId).asChars().charAt(0);
reminder = (char) (candidate - end);
break;
}
}
// including zero
return reminder - 1;
}
/**
*
* @param last
* @param inc
* @return
*/
public Tuple increment(final Tuple last, final long inc, final int baseDigit) {
BigDecimal [] incs = new BigDecimal[last.size()];
boolean [] overflowFlag = new boolean[last.size()];
BigDecimal [] result;
BigDecimal value = new BigDecimal(inc);
BigDecimal [] reverseCardsForDigit = new BigDecimal[baseDigit + 1];
for (int i = baseDigit; i >= 0; i--) {
if (i == baseDigit) {
reverseCardsForDigit[i] = colCards[i];
} else {
reverseCardsForDigit[i] = reverseCardsForDigit[i+1].multiply(colCards[i]);
}
}
for (int i = 0; i < baseDigit; i++) {
result = value.divideAndRemainder(reverseCardsForDigit[i + 1]);
incs[i] = result[0];
value = result[1];
}
int finalId = baseDigit;
incs[finalId] = value;
for (int i = finalId; i >= 0; i--) {
if (isOverflow(i, last.get(i), incs[i], sortSpecs)) {
if (i == 0) {
throw new RangeOverflowException(range, last, incs[i].longValue());
}
long rem = incrementAndGetReminder(i, last.get(i), value.longValue());
incs[i] = new BigDecimal(rem);
incs[i - 1] = incs[i-1].add(new BigDecimal(1));
overflowFlag[i] = true;
} else {
if (i > 0) {
incs[i] = value;
break;
}
}
}
for (int i = 0; i < incs.length; i++) {
if (incs[i] == null) {
incs[i] = new BigDecimal(0);
}
}
Tuple end = new VTuple(sortSpecs.length);
Column column;
for (int i = 0; i < last.size(); i++) {
column = sortSpecs[i].getSortKey();
switch (column.getDataType().getType()) {
case CHAR:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createChar((char) (range.getStart().get(i).asChar() + incs[i].longValue())));
} else {
end.put(i, DatumFactory.createChar((char) (last.get(i).asChar() + incs[i].longValue())));
}
break;
case BIT:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createBit(
(byte) (range.getStart().get(i).asByte() + incs[i].longValue())));
} else {
end.put(i, DatumFactory.createBit((byte) (last.get(i).asByte() + incs[i].longValue())));
}
break;
case INT2:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createInt2(
(short) (range.getStart().get(i).asInt2() + incs[i].longValue())));
} else {
end.put(i, DatumFactory.createInt2((short) (last.get(i).asInt2() + incs[i].longValue())));
}
break;
case INT4:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createInt4(
(int) (range.getStart().get(i).asInt4() + incs[i].longValue())));
} else {
if (sortSpecs[i].isAscending()) {
end.put(i, DatumFactory.createInt4((int) (last.get(i).asInt4() + incs[i].longValue())));
} else {
end.put(i, DatumFactory.createInt4((int) (last.get(i).asInt4() - incs[i].longValue())));
}
}
break;
case INT8:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createInt8(
range.getStart().get(i).asInt8() + incs[i].longValue()));
} else {
end.put(i, DatumFactory.createInt8(last.get(i).asInt8() + incs[i].longValue()));
}
break;
case FLOAT4:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createFloat4(
range.getStart().get(i).asFloat4() + incs[i].longValue()));
} else {
end.put(i, DatumFactory.createFloat4(last.get(i).asFloat4() + incs[i].longValue()));
}
break;
case FLOAT8:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createFloat8(
range.getStart().get(i).asFloat8() + incs[i].longValue()));
} else {
end.put(i, DatumFactory.createFloat8(last.get(i).asFloat8() + incs[i].longValue()));
}
break;
case TEXT:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createText(((char) (range.getStart().get(i).asChars().charAt(0)
+ incs[i].longValue())) + ""));
} else {
end.put(i, DatumFactory.createText(
((char) ((last.get(i) instanceof NullDatum ? '0': last.get(i).asChars().charAt(0)) + incs[i].longValue())) + ""));
}
break;
case DATE:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createDate((int) (range.getStart().get(i).asInt4() + incs[i].longValue())));
} else {
end.put(i, DatumFactory.createDate((int) (last.get(i).asInt4() + incs[i].longValue())));
}
break;
case TIME:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createTime(range.getStart().get(i).asInt8() + incs[i].longValue()));
} else {
end.put(i, DatumFactory.createTime(last.get(i).asInt8() + incs[i].longValue()));
}
break;
case TIMESTAMP:
if (overflowFlag[i]) {
end.put(i, DatumFactory.createTimeStampFromMillis(
range.getStart().get(i).asInt8() + incs[i].longValue()));
} else {
end.put(i, DatumFactory.createTimeStampFromMillis(last.get(i).asInt8() + incs[i].longValue()));
}
break;
case INET4:
byte[] ipBytes;
if (overflowFlag[i]) {
ipBytes = range.getStart().get(i).asByteArray();
assert ipBytes.length == 4;
end.put(i, DatumFactory.createInet4(ipBytes));
} else {
int lastVal = last.get(i).asInt4() + incs[i].intValue();
ipBytes = new byte[4];
Bytes.putInt(ipBytes, 0, lastVal);
end.put(i, DatumFactory.createInet4(ipBytes));
}
break;
default:
throw new UnsupportedOperationException(column.getDataType() + " is not supported yet");
}
}
return end;
}
}