blob: 375496671fa2188705341f1ffa4e41dd4ac9b55e [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.apache.druid.query.groupby.epinephelinae.column;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.epinephelinae.DictionaryBuilding;
import org.apache.druid.query.groupby.epinephelinae.Grouper;
import org.apache.druid.query.ordering.StringComparator;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ValueType;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.List;
public class ArrayStringGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy
private static final int GROUP_BY_MISSING_VALUE = -1;
// contains string <-> id for each element of the multi value grouping column
// for eg : [a,b,c] is the col value. dictionaryToInt will contain { a <-> 1, b <-> 2, c <-> 3}
private final BiMap<String, Integer> dictionaryToInt;
// stores each row as an integer array where the int represents the value in dictionaryToInt
// for eg : [a,b,c] would be converted to [1,2,3] and assigned a integer value 1.
// [1,2,3] <-> 1
private final BiMap<ComparableIntArray, Integer> intListToInt;
private long estimatedFootprint = 0L;
public int getGroupingKeySize()
return Integer.BYTES;
public ArrayStringGroupByColumnSelectorStrategy()
dictionaryToInt = HashBiMap.create();
intListToInt = HashBiMap.create();
BiMap<String, Integer> dictionaryToInt,
BiMap<ComparableIntArray, Integer> intArrayToInt
this.dictionaryToInt = dictionaryToInt;
this.intListToInt = intArrayToInt;
public void processValueFromGroupingKey(
GroupByColumnSelectorPlus selectorPlus,
ByteBuffer key,
ResultRow resultRow,
int keyBufferPosition
final int id = key.getInt(keyBufferPosition);
// GROUP_BY_MISSING_VALUE is used to indicate empty rows
final int[] intRepresentation = intListToInt.inverse()
final Object[] stringRepresentaion = new Object[intRepresentation.length];
for (int i = 0; i < intRepresentation.length; i++) {
stringRepresentaion[i] = dictionaryToInt.inverse().get(intRepresentation[i]);
resultRow.set(selectorPlus.getResultRowPosition(), stringRepresentaion);
} else {
resultRow.set(selectorPlus.getResultRowPosition(), null);
public int initColumnValues(
ColumnValueSelector selector,
int columnIndex,
Object[] valuess
final long priorFootprint = estimatedFootprint;
final int groupingKey = computeDictionaryId(selector);
valuess[columnIndex] = groupingKey;
return (int) (estimatedFootprint - priorFootprint);
public void initGroupingKeyColumnValue(
int keyBufferPosition,
int dimensionIndex,
Object rowObj,
ByteBuffer keyBuffer,
int[] stack
final int groupingKey = (int) rowObj;
writeToKeyBuffer(keyBufferPosition, groupingKey, keyBuffer);
if (groupingKey == GROUP_BY_MISSING_VALUE) {
stack[dimensionIndex] = 0;
} else {
stack[dimensionIndex] = 1;
public boolean checkRowIndexAndAddValueToGroupingKey(
int keyBufferPosition,
Object rowObj,
int rowValIdx,
ByteBuffer keyBuffer
return false;
* Compute dictionary ID for the given selector. Updates {@link #estimatedFootprint} as necessary.
int computeDictionaryId(ColumnValueSelector selector)
final int[] intRepresentation;
Object object = selector.getObject();
if (object == null) {
} else if (object instanceof String) {
intRepresentation = new int[1];
intRepresentation[0] = addToIndexedDictionary((String) object);
} else if (object instanceof List) {
final int size = ((List<?>) object).size();
intRepresentation = new int[size];
for (int i = 0; i < size; i++) {
intRepresentation[i] = addToIndexedDictionary((String) ((List<?>) object).get(i));
} else if (object instanceof String[]) {
final int size = ((String[]) object).length;
intRepresentation = new int[size];
for (int i = 0; i < size; i++) {
intRepresentation[i] = addToIndexedDictionary(((String[]) object)[i]);
} else if (object instanceof Object[]) {
final int size = ((Object[]) object).length;
intRepresentation = new int[size];
for (int i = 0; i < size; i++) {
intRepresentation[i] = addToIndexedDictionary((String) ((Object[]) object)[i]);
} else {
throw new ISE("Found unexpected object type [%s] in %s array.", object.getClass().getName(), ValueType.STRING);
final ComparableIntArray comparableIntArray = ComparableIntArray.of(intRepresentation);
final int dictId = intListToInt.getOrDefault(comparableIntArray, GROUP_BY_MISSING_VALUE);
final int nextId = intListToInt.keySet().size();
intListToInt.put(comparableIntArray, nextId);
// We're not using the dictionary and reverseDictionary from DictionaryBuilding, but the BiMap is close enough
// that we expect this footprint calculation to still be useful. (It doesn't have to be exact.)
estimatedFootprint +=
DictionaryBuilding.estimateEntryFootprint(comparableIntArray.getDelegate().length * Integer.BYTES);
return nextId;
} else {
return dictId;
private int addToIndexedDictionary(String value)
final Integer dictId = dictionaryToInt.get(value);
if (dictId == null) {
final int nextId = dictionaryToInt.size();
dictionaryToInt.put(value, nextId);
// We're not using the dictionary and reverseDictionary from DictionaryBuilding, but the BiMap is close enough
// that we expect this footprint calculation to still be useful. (It doesn't have to be exact.)
estimatedFootprint +=
DictionaryBuilding.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES);
return nextId;
} else {
return dictId;
public int writeToKeyBuffer(int keyBufferPosition, ColumnValueSelector selector, ByteBuffer keyBuffer)
final long priorFootprint = estimatedFootprint;
// computeDictionaryId updates estimatedFootprint
keyBuffer.putInt(keyBufferPosition, computeDictionaryId(selector));
return (int) (estimatedFootprint - priorFootprint);
public Grouper.BufferComparator bufferComparator(int keyBufferPosition, @Nullable StringComparator stringComparator)
final StringComparator comparator = stringComparator == null ? StringComparators.LEXICOGRAPHIC : stringComparator;
return (lhsBuffer, rhsBuffer, lhsPosition, rhsPosition) -> {
int[] lhs = intListToInt.inverse().get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)).getDelegate();
int[] rhs = intListToInt.inverse().get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)).getDelegate();
int minLength = Math.min(lhs.length, rhs.length);
//noinspection ArrayEquality
if (lhs == rhs) {
return 0;
} else {
for (int i = 0; i < minLength; i++) {
final int cmp =
if (cmp == 0) {
return cmp;
if (lhs.length == rhs.length) {
return 0;
} else if (lhs.length < rhs.length) {
return -1;
return 1;
public void reset()
estimatedFootprint = 0;
void writeToKeyBuffer(int keyBufferPosition, int groupingKey, ByteBuffer keyBuffer)
keyBuffer.putInt(keyBufferPosition, groupingKey);