blob: c5f1800acd15c341c3babef086ceb6c5b67a1dee [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.metadata;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Accumulator;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.metadata.metadata.ColumnAnalysis;
import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ComplexColumn;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.serde.ComplexMetricSerde;
import org.apache.druid.segment.serde.ComplexMetrics;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
public class SegmentAnalyzer
{
private static final Logger log = new Logger(SegmentAnalyzer.class);
/**
* This is based on the minimum size of a timestamp (POSIX seconds). An ISO timestamp will actually be more like 24+
*/
private static final int NUM_BYTES_IN_TIMESTAMP = 10;
/**
* This is based on assuming 6 units of precision, one decimal point and a single value left of the decimal
*/
private static final int NUM_BYTES_IN_TEXT_FLOAT = 8;
private final EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes;
public SegmentAnalyzer(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{
this.analysisTypes = analysisTypes;
}
public long numRows(Segment segment)
{
return Preconditions.checkNotNull(segment, "segment").asStorageAdapter().getNumRows();
}
public Map<String, ColumnAnalysis> analyze(Segment segment)
{
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
final Set<String> columnNames = new HashSet<>();
Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
Map<String, ColumnAnalysis> columns = new TreeMap<>();
for (String columnName : columnNames) {
final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName);
final ColumnCapabilities capabilities = columnHolder != null
? columnHolder.getCapabilities()
: storageAdapter.getColumnCapabilities(columnName);
final ColumnAnalysis analysis;
final ValueType type = capabilities.getType();
switch (type) {
case LONG:
analysis = analyzeNumericColumn(capabilities, length, Long.BYTES);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case DOUBLE:
analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, columnHolder);
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
analysis = analyzeComplexColumn(capabilities, columnHolder, storageAdapter.getColumnTypeName(columnName));
break;
default:
log.warn("Unknown column type[%s].", type);
analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", type));
}
columns.put(columnName, analysis);
}
// Add time column too
ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME);
if (timeCapabilities == null) {
timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
}
columns.put(
ColumnHolder.TIME_COLUMN_NAME,
analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP)
);
return columns;
}
public boolean analyzingSize()
{
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.SIZE);
}
public boolean analyzingCardinality()
{
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY);
}
public boolean analyzingMinMax()
{
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.MINMAX);
}
private ColumnAnalysis analyzeNumericColumn(
final ColumnCapabilities capabilities,
final int length,
final int sizePerRow
)
{
long size = 0;
if (analyzingSize()) {
if (capabilities.hasMultipleValues()) {
return ColumnAnalysis.error("multi_value");
}
size = ((long) length) * sizePerRow;
}
return new ColumnAnalysis(
capabilities.getType().name(),
capabilities.hasMultipleValues(),
size,
null,
null,
null,
null
);
}
private ColumnAnalysis analyzeStringColumn(
final ColumnCapabilities capabilities,
final ColumnHolder columnHolder
)
{
Comparable min = null;
Comparable max = null;
long size = 0;
final int cardinality;
if (capabilities.hasBitmapIndexes()) {
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
cardinality = bitmapIndex.getCardinality();
if (analyzingSize()) {
for (int i = 0; i < cardinality; ++i) {
String value = bitmapIndex.getValue(i);
if (value != null) {
size += StringUtils.estimatedBinaryLengthAsUTF8(value) *
((long) bitmapIndex.getBitmap(bitmapIndex.getIndex(value)).size());
}
}
}
if (analyzingMinMax() && cardinality > 0) {
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
}
} else if (capabilities.isDictionaryEncoded()) {
// fallback if no bitmap index
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
cardinality = theColumn.getCardinality();
if (analyzingMinMax() && cardinality > 0) {
min = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(0));
max = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(cardinality - 1));
}
} else {
cardinality = 0;
}
return new ColumnAnalysis(
capabilities.getType().name(),
capabilities.hasMultipleValues(),
size,
analyzingCardinality() ? cardinality : 0,
min,
max,
null
);
}
private ColumnAnalysis analyzeStringColumn(
final ColumnCapabilities capabilities,
final StorageAdapter storageAdapter,
final String columnName
)
{
int cardinality = 0;
long size = 0;
Comparable min = null;
Comparable max = null;
if (analyzingCardinality()) {
cardinality = storageAdapter.getDimensionCardinality(columnName);
}
if (analyzingSize()) {
final DateTime start = storageAdapter.getMinTime();
final DateTime end = storageAdapter.getMaxTime();
final Sequence<Cursor> cursors =
storageAdapter.makeCursors(
null,
new Interval(start, end),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
size = cursors.accumulate(
0L,
new Accumulator<Long, Cursor>()
{
@Override
public Long accumulate(Long accumulated, Cursor cursor)
{
DimensionSelector selector = cursor
.getColumnSelectorFactory()
.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
if (selector == null) {
return accumulated;
}
long current = accumulated;
while (!cursor.isDone()) {
final IndexedInts row = selector.getRow();
for (int i = 0, rowSize = row.size(); i < rowSize; ++i) {
final String dimVal = selector.lookupName(row.get(i));
if (dimVal != null && !dimVal.isEmpty()) {
current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
}
}
cursor.advance();
}
return current;
}
}
);
}
if (analyzingMinMax()) {
min = storageAdapter.getMinValue(columnName);
max = storageAdapter.getMaxValue(columnName);
}
return new ColumnAnalysis(
capabilities.getType().name(),
capabilities.hasMultipleValues(),
size,
cardinality,
min,
max,
null
);
}
private ColumnAnalysis analyzeComplexColumn(
@Nullable final ColumnCapabilities capabilities,
@Nullable final ColumnHolder columnHolder,
final String typeName
)
{
try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) {
final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues();
long size = 0;
if (analyzingSize() && complexColumn != null) {
final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
if (serde == null) {
return ColumnAnalysis.error(StringUtils.format("unknown_complex_%s", typeName));
}
final Function<Object, Long> inputSizeFn = serde.inputSizeFn();
if (inputSizeFn == null) {
return new ColumnAnalysis(typeName, hasMultipleValues, 0, null, null, null, null);
}
final int length = complexColumn.getLength();
for (int i = 0; i < length; ++i) {
size += inputSizeFn.apply(complexColumn.getRowValue(i));
}
}
return new ColumnAnalysis(
typeName,
hasMultipleValues,
size,
null,
null,
null,
null
);
}
}
}