blob: dbdb6236fdb22229a918c739d96f0c05ddc64fc3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
import org.apache.druid.segment.nested.GlobalDimensionDictionary;
import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.NestedLiteralTypeInfo;
import org.apache.druid.segment.nested.StructuredData;
import org.apache.druid.segment.nested.StructuredDataProcessor;
import javax.annotation.Nullable;
import java.util.Objects;
import java.util.SortedMap;
import java.util.TreeMap;
public class NestedDataColumnIndexer implements DimensionIndexer<StructuredData, StructuredData, StructuredData>
{
protected volatile boolean hasNulls = false;
protected SortedMap<String, LiteralFieldIndexer> fieldIndexers = new TreeMap<>();
protected final GlobalDimensionDictionary globalDictionary = new GlobalDimensionDictionary();
int estimatedFieldKeySize = 0;
protected final StructuredDataProcessor indexerProcessor = new StructuredDataProcessor()
{
@Override
public int processLiteralField(String fieldName, Object fieldValue)
{
LiteralFieldIndexer fieldIndexer = fieldIndexers.get(fieldName);
if (fieldIndexer == null) {
estimatedFieldKeySize += StructuredDataProcessor.estimateStringSize(fieldName);
fieldIndexer = new LiteralFieldIndexer(globalDictionary);
fieldIndexers.put(fieldName, fieldIndexer);
}
return fieldIndexer.processValue(fieldValue);
}
};
@Override
public EncodedKeyComponent<StructuredData> processRowValsToUnsortedEncodedKeyComponent(
@Nullable Object dimValues,
boolean reportParseExceptions
)
{
final long oldDictSizeInBytes = globalDictionary.sizeInBytes();
final int oldFieldKeySize = estimatedFieldKeySize;
final StructuredData data;
if (dimValues == null) {
hasNulls = true;
data = null;
} else if (dimValues instanceof StructuredData) {
data = (StructuredData) dimValues;
} else {
data = new StructuredData(dimValues);
}
StructuredDataProcessor.ProcessResults info = indexerProcessor.processFields(data == null ? null : data.getValue());
// 'raw' data is currently preserved 'as-is', and not replaced with object references to the global dictionaries
long effectiveSizeBytes = info.getEstimatedSize();
// then, we add the delta of size change to the global dictionaries to account for any new space added by the
// 'raw' data
effectiveSizeBytes += (globalDictionary.sizeInBytes() - oldDictSizeInBytes);
effectiveSizeBytes += (estimatedFieldKeySize - oldFieldKeySize);
return new EncodedKeyComponent<>(data, effectiveSizeBytes);
}
@Override
public void setSparseIndexed()
{
this.hasNulls = true;
}
@Override
public StructuredData getUnsortedEncodedValueFromSorted(StructuredData sortedIntermediateValue)
{
return sortedIntermediateValue;
}
@Override
public CloseableIndexed<StructuredData> getSortedIndexedValues()
{
throw new UnsupportedOperationException("Not supported");
}
@Override
public StructuredData getMinValue()
{
throw new UnsupportedOperationException("Not supported");
}
@Override
public StructuredData getMaxValue()
{
throw new UnsupportedOperationException("Not supported");
}
@Override
public int getCardinality()
{
return globalDictionary.getCardinality();
}
@Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,
IncrementalIndexRowHolder currEntry,
IncrementalIndex.DimensionDesc desc
)
{
throw new UnsupportedOperationException("Not supported");
}
@Override
public ColumnValueSelector<?> makeColumnValueSelector(
IncrementalIndexRowHolder currEntry,
IncrementalIndex.DimensionDesc desc
)
{
final int dimIndex = desc.getIndex();
return new ObjectColumnSelector<StructuredData>()
{
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
}
@Nullable
@Override
public StructuredData getObject()
{
return (StructuredData) currEntry.get().getDims()[dimIndex];
}
@Override
public Class<StructuredData> classOfObject()
{
return StructuredData.class;
}
};
}
@Override
public ColumnCapabilities getColumnCapabilities()
{
return ColumnCapabilitiesImpl.createDefault()
.setType(NestedDataComplexTypeSerde.TYPE)
.setHasNulls(hasNulls);
}
@Override
public int compareUnsortedEncodedKeyComponents(
@Nullable StructuredData lhs,
@Nullable StructuredData rhs
)
{
return StructuredData.COMPARATOR.compare(lhs, rhs);
}
@Override
public boolean checkUnsortedEncodedKeyComponentsEqual(
@Nullable StructuredData lhs,
@Nullable StructuredData rhs
)
{
return Objects.equals(lhs, rhs);
}
@Override
public int getUnsortedEncodedKeyComponentHashCode(@Nullable StructuredData key)
{
return Objects.hash(key);
}
@Override
public Object convertUnsortedEncodedKeyComponentToActualList(StructuredData key)
{
return key;
}
@Override
public ColumnValueSelector convertUnsortedValuesToSorted(ColumnValueSelector selectorWithUnsortedValues)
{
return selectorWithUnsortedValues;
}
@Override
public void fillBitmapsFromUnsortedEncodedKeyComponent(
StructuredData key,
int rowNum,
MutableBitmap[] bitmapIndexes,
BitmapFactory factory
)
{
throw new UnsupportedOperationException("Not supported");
}
static class LiteralFieldIndexer
{
private final GlobalDimensionDictionary globalDimensionDictionary;
private final NestedLiteralTypeInfo.MutableTypeSet typeSet;
LiteralFieldIndexer(GlobalDimensionDictionary globalDimensionDictionary)
{
this.globalDimensionDictionary = globalDimensionDictionary;
this.typeSet = new NestedLiteralTypeInfo.MutableTypeSet();
}
private int processValue(@Nullable Object value)
{
// null value is always added to the global dictionary as id 0, so we can ignore them here
if (value != null) {
// why not
ExprEval<?> eval = ExprEval.bestEffortOf(value);
final ColumnType columnType = ExpressionType.toColumnType(eval.type());
switch (columnType.getType()) {
case LONG:
globalDimensionDictionary.addLongValue(eval.asLong());
typeSet.add(ColumnType.LONG);
return StructuredDataProcessor.getLongObjectEstimateSize();
case DOUBLE:
globalDimensionDictionary.addDoubleValue(eval.asDouble());
typeSet.add(ColumnType.DOUBLE);
return StructuredDataProcessor.getDoubleObjectEstimateSize();
case STRING:
default:
final String asString = eval.asString();
globalDimensionDictionary.addStringValue(asString);
typeSet.add(ColumnType.STRING);
return StructuredDataProcessor.estimateStringSize(asString);
}
}
return 0;
}
public NestedLiteralTypeInfo.MutableTypeSet getTypes()
{
return typeSet;
}
}
}