blob: 52745e62fb30cdbf12b763d0f61d39cb73dea656 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.guice.NestedDataModule;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthTestUtils;
import org.apache.druid.sql.calcite.SqlVectorizedExpressionSanityTest;
import org.apache.druid.sql.calcite.planner.CalciteRulesManager;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.planner.DruidPlanner;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.planner.PlannerResult;
import org.apache.druid.sql.calcite.run.SqlEngine;
import org.apache.druid.sql.calcite.schema.DruidSchemaCatalog;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class SqlGroupByBenchmark
{
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
NestedDataModule.registerHandlersAndSerde();
}
private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
{
@Override
public int intermediateComputeSizeBytes()
{
return 512 * 1024 * 1024;
}
@Override
public int getNumMergeBuffers()
{
return 3;
}
@Override
public int getNumThreads()
{
return 1;
}
@Override
public String getFormatString()
{
return "benchmarks-processing-%s";
}
};
@Param({
"string-Sequential-100_000",
"string-Sequential-10_000_000",
// "string-Sequential-1_000_000_000",
"string-ZipF-1_000_000",
"string-Uniform-1_000_000",
"multi-string-Sequential-100_000",
"multi-string-Sequential-10_000_000",
// "multi-string-Sequential-1_000_000_000",
"multi-string-ZipF-1_000_000",
"multi-string-Uniform-1_000_000",
"long-Sequential-100_000",
"long-Sequential-10_000_000",
// "long-Sequential-1_000_000_000",
"long-ZipF-1_000_000",
"long-Uniform-1_000_000",
"double-ZipF-1_000_000",
"double-Uniform-1_000_000",
"float-ZipF-1_000_000",
"float-Uniform-1_000_000",
"stringArray-Sequential-100_000",
"stringArray-Sequential-3_000_000",
// "stringArray-Sequential-1_000_000_000",
"stringArray-ZipF-1_000_000",
"stringArray-Uniform-1_000_000",
"longArray-Sequential-100_000",
"longArray-Sequential-3_000_000",
// "longArray-Sequential-1_000_000_000",
"longArray-ZipF-1_000_000",
"longArray-Uniform-1_000_000",
"nested-Sequential-100_000",
"nested-Sequential-3_000_000",
// "nested-Sequential-1_000_000_000",
"nested-ZipF-1_000_000",
"nested-Uniform-1_000_000",
})
private String groupingDimension;
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private Closer closer = Closer.create();
@Setup(Level.Trial)
public void setup()
{
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("groupBy-testbench");
final DataSegment dataSegment = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build();
final DataSegment dataSegment2 = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(1))
.size(0)
.build();
final PlannerConfig plannerConfig = new PlannerConfig();
String columnCardinalityWithUnderscores = groupingDimension.substring(groupingDimension.lastIndexOf('-') + 1);
int rowsPerSegment = Integer.parseInt(StringUtils.replace(columnCardinalityWithUnderscores, "_", ""));
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
TransformSpec transformSpec = new TransformSpec(
null,
ImmutableList.of(
// string array dims
new ExpressionTransform(
"stringArray-Sequential-100_000",
"array(\"string-Sequential-100_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"stringArray-Sequential-3_000_000",
"array(\"string-Sequential-10_000_000\")",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"stringArray-Sequential-1_000_000_000",
"array(\"string-Sequential-1_000_000_000\")",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"stringArray-ZipF-1_000_000",
"array(\"string-ZipF-1_000_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"stringArray-Uniform-1_000_000",
"array(\"string-Uniform-1_000_000\")",
TestExprMacroTable.INSTANCE
),
// long array dims
new ExpressionTransform(
"longArray-Sequential-100_000",
"array(\"long-Sequential-100_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"longArray-Sequential-3_000_000",
"array(\"long-Sequential-10_000_000\")",
TestExprMacroTable.INSTANCE
),
/*
new ExpressionTransform(
"longArray-Sequential-1_000_000_000",
"array(\"long-Sequential-1_000_000_000\")",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"longArray-ZipF-1_000_000",
"array(\"long-ZipF-1_000_000\")",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"longArray-Uniform-1_000_000",
"array(\"long-Uniform-1_000_000\")",
TestExprMacroTable.INSTANCE
),
// nested complex json dim
new ExpressionTransform(
"nested-Sequential-100_000",
"json_object('long1', \"long-Sequential-100_000\", 'nesteder', json_object('long1', \"long-Sequential-100_000\"))",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"nested-Sequential-3_000_000",
"json_object('long1', \"long-Sequential-10_000_000\", 'nesteder', json_object('long1', \"long-Sequential-10_000_000\"))",
TestExprMacroTable.INSTANCE
),
/*new ExpressionTransform(
"nested-Sequential-1_000_000_000",
"json_object('long1', \"long-Sequential-1_000_000_000\", 'nesteder', json_object('long1', \"long-Sequential-1_000_000_000\"))",
TestExprMacroTable.INSTANCE
),*/
new ExpressionTransform(
"nested-ZipF-1_000_000",
"json_object('long1', \"long-ZipF-1_000_000\", 'nesteder', json_object('long1', \"long-ZipF-1_000_000\"))",
TestExprMacroTable.INSTANCE
),
new ExpressionTransform(
"nested-Uniform-1_000_000",
"json_object('long1', \"long-Uniform-1_000_000\", 'nesteder', json_object('long1', \"long-Uniform-1_000_000\"))",
TestExprMacroTable.INSTANCE
)
)
);
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList());
List<DimensionSchema> transformSchemas = transformSpec
.getTransforms()
.stream()
.map(
transform -> new AutoTypeColumnSchema(transform.getName(), null)
)
.collect(Collectors.toList());
final QueryableIndex index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder()
.setDimensions(ImmutableList.<DimensionSchema>builder()
.addAll(columnSchemas)
.addAll(transformSchemas)
.build()
)
.build(),
transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(new StringEncodingStrategy.Utf8()).build(),
Granularities.NONE,
rowsPerSegment
);
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(
closer,
PROCESSING_CONFIG
);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate)
.add(dataSegment, index)
.add(dataSegment2, index);
closer.register(walker);
// Hacky and pollutes global namespace, but it is fine since benchmarks are run in isolation. Wasn't able
// to work up a cleaner way of doing it by modifying the injector.
CalciteTests.getJsonMapper().registerModules(NestedDataModule.getJacksonModulesList());
final DruidSchemaCatalog rootSchema =
CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
plannerFactory = new PlannerFactory(
rootSchema,
CalciteTests.createOperatorTable(),
CalciteTests.createExprMacroTable(),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
CalciteTests.getJsonMapper(),
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
CalciteTests.createJoinableFactoryWrapper(),
CatalogResolver.NULL_RESOLVER,
new AuthConfig()
);
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
plannerFactory,
sqlQuery(groupingDimension)
);
}
catch (Throwable ignored) {
// the show must go on
}
}
@TearDown(Level.Trial)
public void tearDown() throws Exception
{
closer.close();
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void querySql(Blackhole blackhole)
{
final String sql = sqlQuery(groupingDimension);
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, Collections.emptyMap())) {
final PlannerResult plannerResult = planner.plan();
final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
final Object[] lastRow = resultSequence.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
}
private static String sqlQuery(String groupingDimension)
{
return StringUtils.format("SELECT \"%s\", COUNT(*) FROM foo GROUP BY 1", groupingDimension);
}
}