blob: 7e6e8207c27fdcca2f9d784d2f16f40add6f056b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.test;
import org.apache.calcite.jdbc.CalciteSchema;
import org.apache.calcite.materialize.Lattice;
import org.apache.calcite.materialize.Lattices;
import org.apache.calcite.materialize.MaterializationService;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.rules.materialize.MaterializedViewRules;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.test.schemata.foodmart.FoodmartSchema;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.TestUtil;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import static org.apache.calcite.test.Matchers.containsStringLinux;
import static org.apache.calcite.test.Matchers.within;
import static org.hamcrest.CoreMatchers.anyOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
/**
* Unit test for lattices.
*/
@Tag("slow")
class LatticeTest {
private static final String SALES_LATTICE = "{\n"
+ " name: 'star',\n"
+ " sql: [\n"
+ " 'select 1 from \"foodmart\".\"sales_fact_1997\" as \"s\"',\n"
+ " 'join \"foodmart\".\"product\" as \"p\" using (\"product_id\")',\n"
+ " 'join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")',\n"
+ " 'join \"foodmart\".\"product_class\" as \"pc\" on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"'\n"
+ " ],\n"
+ " auto: false,\n"
+ " algorithm: true,\n"
+ " algorithmMaxMillis: 10000,\n"
+ " rowCountEstimate: 86837,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'unit_sales'\n"
+ " }, {\n"
+ " agg: 'sum',\n"
+ " args: 'store_sales'\n"
+ " }, {\n"
+ " agg: 'count'\n"
+ " } ]\n"
+ " } ]\n"
+ "}\n";
private static final String INVENTORY_LATTICE = "{\n"
+ " name: 'warehouse',\n"
+ " sql: [\n"
+ " 'select 1 from \"foodmart\".\"inventory_fact_1997\" as \"s\"',\n"
+ " 'join \"foodmart\".\"product\" as \"p\" using (\"product_id\")',\n"
+ " 'join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")',\n"
+ " 'join \"foodmart\".\"warehouse\" as \"w\" using (\"warehouse_id\")'\n"
+ " ],\n"
+ " auto: false,\n"
+ " algorithm: true,\n"
+ " algorithmMaxMillis: 10000,\n"
+ " rowCountEstimate: 4070,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', 'warehouse_name'],\n"
+ " measures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'store_invoice'\n"
+ " }, {\n"
+ " agg: 'sum',\n"
+ " args: 'supply_time'\n"
+ " }, {\n"
+ " agg: 'sum',\n"
+ " args: 'warehouse_cost'\n"
+ " } ]\n"
+ " } ]\n"
+ "}\n";
private static final String AUTO_LATTICE = "{\n"
+ " name: 'star',\n"
+ " sql: [\n"
+ " 'select 1 from \"foodmart\".\"sales_fact_1997\" as \"s\"',\n"
+ " 'join \"foodmart\".\"product\" as \"p\" using (\"product_id\")',\n"
+ " 'join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")',\n"
+ " 'join \"foodmart\".\"product_class\" as \"pc\" on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"'\n"
+ " ],\n"
+ " auto: false,\n"
+ " algorithm: true,\n"
+ " algorithmMaxMillis: 10000,\n"
+ " rowCountEstimate: 86837,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'unit_sales'\n"
+ " }, {\n"
+ " agg: 'sum',\n"
+ " args: 'store_sales'\n"
+ " }, {\n"
+ " agg: 'count'\n"
+ " } ]\n"
+ " } ]\n"
+ "}\n";
private static CalciteAssert.AssertThat modelWithLattice(String name,
String sql, String... extras) {
final StringBuilder buf = new StringBuilder("{ name: '")
.append(name)
.append("', sql: ")
.append(TestUtil.escapeString(sql));
for (String extra : extras) {
buf.append(", ").append(extra);
}
buf.append("}");
return modelWithLattices(buf.toString());
}
private static CalciteAssert.AssertThat modelWithLattices(
String... lattices) {
final Class<JdbcTest.EmpDeptTableFactory> clazz =
JdbcTest.EmpDeptTableFactory.class;
return CalciteAssert.model(""
+ "{\n"
+ " version: '1.0',\n"
+ " schemas: [\n"
+ FoodmartSchema.FOODMART_SCHEMA
+ ",\n"
+ " {\n"
+ " name: 'adhoc',\n"
+ " tables: [\n"
+ " {\n"
+ " name: 'EMPLOYEES',\n"
+ " type: 'custom',\n"
+ " factory: '"
+ clazz.getName()
+ "',\n"
+ " operand: {'foo': true, 'bar': 345}\n"
+ " }\n"
+ " ],\n"
+ " lattices: "
+ Arrays.toString(lattices)
+ " }\n"
+ " ]\n"
+ "}").withDefaultSchema("adhoc");
}
/** Tests that it's OK for a lattice to have the same name as a table in the
* schema. */
@Test void testLatticeSql() throws Exception {
modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
.doWithConnection(c -> {
final SchemaPlus schema = c.getRootSchema();
final SchemaPlus adhoc = schema.getSubSchema("adhoc");
assertThat(adhoc.getTableNames().contains("EMPLOYEES"), is(true));
final Map.Entry<String, CalciteSchema.LatticeEntry> entry =
adhoc.unwrap(CalciteSchema.class).getLatticeMap().firstEntry();
final Lattice lattice = entry.getValue().getLattice();
final String sql = "SELECT \"days\".\"day\"\n"
+ "FROM \"foodmart\".\"days\" AS \"days\"\n"
+ "GROUP BY \"days\".\"day\"";
assertThat(
lattice.sql(ImmutableBitSet.of(0),
ImmutableList.of()), is(sql));
final String sql2 = "SELECT"
+ " \"days\".\"day\", \"days\".\"week_day\"\n"
+ "FROM \"foodmart\".\"days\" AS \"days\"";
assertThat(
lattice.sql(ImmutableBitSet.of(0, 1), false,
ImmutableList.of()),
is(sql2));
});
}
/** Tests some of the properties of the {@link Lattice} data structure. */
@Test void testLattice() throws Exception {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n"
+ "join \"foodmart\".\"time_by_day\" as t on t.\"time_id\" = s.\"time_id\"")
.doWithConnection(c -> {
final SchemaPlus schema = c.getRootSchema();
final SchemaPlus adhoc = schema.getSubSchema("adhoc");
assertThat(adhoc.getTableNames().contains("EMPLOYEES"), is(true));
final Map.Entry<String, CalciteSchema.LatticeEntry> entry =
adhoc.unwrap(CalciteSchema.class).getLatticeMap().firstEntry();
final Lattice lattice = entry.getValue().getLattice();
assertThat(lattice.firstColumn("S"), is(10));
assertThat(lattice.firstColumn("P"), is(18));
assertThat(lattice.firstColumn("T"), is(0));
assertThat(lattice.firstColumn("PC"), is(-1));
assertThat(lattice.defaultMeasures.size(), is(1));
assertThat(lattice.rootNode.descendants.size(), is(3));
});
}
/** Tests that it's OK for a lattice to have the same name as a table in the
* schema. */
@Test void testLatticeWithSameNameAsTable() {
modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
.query("select count(*) from EMPLOYEES")
.returnsValue("4");
}
/** Tests that it's an error to have two lattices with the same name in a
* schema. */
@Test void testTwoLatticesWithSameNameFails() {
modelWithLattices(
"{name: 'Lattice1', sql: 'select * from \"foodmart\".\"days\"'}",
"{name: 'Lattice1', sql: 'select * from \"foodmart\".\"time_by_day\"'}")
.connectThrows("Duplicate lattice 'Lattice1'");
}
/** Tests a lattice whose SQL is invalid. */
@Test void testLatticeInvalidSqlFails() {
modelWithLattice("star", "select foo from nonexistent")
.connectThrows("Error instantiating JsonLattice(name=star, ")
.connectThrows("Object 'NONEXISTENT' not found");
}
/** Tests a lattice whose SQL is invalid because it contains a GROUP BY. */
@Test void testLatticeSqlWithGroupByFails() {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s group by \"product_id\"")
.connectThrows("Invalid node type LogicalAggregate in lattice query");
}
/** Tests a lattice whose SQL is invalid because it contains a ORDER BY. */
@Test void testLatticeSqlWithOrderByFails() {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s order by \"product_id\"")
.connectThrows("Invalid node type LogicalSort in lattice query");
}
/** Tests a lattice whose SQL is invalid because it contains a UNION ALL. */
@Test void testLatticeSqlWithUnionFails() {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "union all\n"
+ "select 1 from \"foodmart\".\"sales_fact_1997\" as s")
.connectThrows("Invalid node type LogicalUnion in lattice query");
}
/** Tests a lattice with valid join SQL. */
@Test void testLatticeSqlWithJoin() {
foodmartModel()
.query("values 1")
.returnsValue("1");
}
/** Tests a lattice with invalid SQL (for a lattice). */
@Test void testLatticeInvalidSql() {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n"
+ "join \"foodmart\".\"time_by_day\" as t on s.\"product_id\" = 100")
.connectThrows("only equi-join of columns allowed: 100");
}
/** Left join is invalid in a lattice. */
@Test void testLatticeInvalidSql2() {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n"
+ "left join \"foodmart\".\"time_by_day\" as t on s.\"product_id\" = p.\"product_id\"")
.connectThrows("only non nulls-generating join allowed, but got LEFT");
}
/** Each lattice table must have a parent. */
@Test void testLatticeInvalidSql3() {
modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n"
+ "join \"foodmart\".\"time_by_day\" as t on s.\"product_id\" = p.\"product_id\"")
.connectThrows("child node must have precisely one parent");
}
/** When a lattice is registered, there is a table with the same name.
* It can be used for explain, but not for queries. */
@Test void testLatticeStarTable() {
final AtomicInteger counter = new AtomicInteger();
try {
foodmartModel()
.query("select count(*) from \"adhoc\".\"star\"")
.convertMatches(
CalciteAssert.checkRel(""
+ "LogicalAggregate(group=[{}], EXPR$0=[COUNT()])\n"
+ " StarTableScan(table=[[adhoc, star]])\n",
counter));
} catch (Throwable e) {
assertThat(Throwables.getStackTraceAsString(e),
containsString("CannotPlanException"));
}
assertThat(counter.get(), equalTo(1));
}
/** Tests that a 2-way join query can be mapped 4-way join lattice. */
@Test void testLatticeRecognizeJoin() {
final AtomicInteger counter = new AtomicInteger();
foodmartModel()
.query("select s.\"unit_sales\", p.\"brand_name\"\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n")
.enableMaterializations(true)
.substitutionMatches(
CalciteAssert.checkRel(
"LogicalProject(unit_sales=[$7], brand_name=[$10])\n"
+ " LogicalProject(product_id=[$0], time_id=[$1], customer_id=[$2], promotion_id=[$3], store_id=[$4], store_sales=[$5], store_cost=[$6], unit_sales=[$7], product_class_id=[$8], product_id0=[$9], brand_name=[$10], product_name=[$11], SKU=[$12], SRP=[$13], gross_weight=[$14], net_weight=[$15], recyclable_package=[$16], low_fat=[$17], units_per_case=[$18], cases_per_pallet=[$19], shelf_width=[$20], shelf_height=[$21], shelf_depth=[$22])\n"
+ " StarTableScan(table=[[adhoc, star]])\n",
counter));
assertThat(counter.intValue(), equalTo(1));
}
/** Tests an aggregate on a 2-way join query can use an aggregate table. */
@Test void testLatticeRecognizeGroupJoin() {
final AtomicInteger counter = new AtomicInteger();
CalciteAssert.AssertQuery that = foodmartModel()
.query("select distinct p.\"brand_name\", s.\"customer_id\"\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n")
.enableMaterializations(true)
.substitutionMatches(relNode -> {
counter.incrementAndGet();
String s = RelOptUtil.toString(relNode);
assertThat(s,
anyOf(
containsStringLinux(
"LogicalProject(brand_name=[$1], customer_id=[$0])\n"
+ " LogicalAggregate(group=[{2, 10}])\n"
+ " StarTableScan(table=[[adhoc, star]])\n"),
containsStringLinux(
"LogicalAggregate(group=[{2, 10}])\n"
+ " StarTableScan(table=[[adhoc, star]])\n")));
return null;
});
assertThat(counter.intValue(), equalTo(2));
that.explainContains(""
+ "EnumerableCalc(expr#0..1=[{inputs}], brand_name=[$t1], customer_id=[$t0])\n"
+ " EnumerableTableScan(table=[[adhoc, m{2, 10}]])")
.returnsCount(69203);
// Run the same query again and see whether it uses the same
// materialization.
that.withHook(Hook.CREATE_MATERIALIZATION,
materializationName -> counter.incrementAndGet())
.returnsCount(69203);
// Ideally the counter would stay at 2. It increments to 3 because
// CalciteAssert.AssertQuery creates a new schema for every request,
// and therefore cannot re-use lattices or materializations from the
// previous request.
assertThat(counter.intValue(), equalTo(3));
}
/** Tests a model with pre-defined tiles. */
@Test void testLatticeWithPreDefinedTiles() {
foodmartModel(" auto: false,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.query("select distinct t.\"the_year\", t.\"quarter\"\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
.enableMaterializations(true)
.explainContains("EnumerableTableScan(table=[[adhoc, m{32, 36}")
.returnsCount(4);
}
/** A query that uses a pre-defined aggregate table, at the same
* granularity but fewer calls to aggregate functions. */
@Test void testLatticeWithPreDefinedTilesFewerMeasures() {
foodmartModelWithOneTile()
.query("select t.\"the_year\", t.\"quarter\", count(*) as c\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n"
+ "group by t.\"the_year\", t.\"quarter\"")
.enableMaterializations(true)
.explainContains(""
+ "EnumerableCalc(expr#0..4=[{inputs}], proj#0..2=[{exprs}])\n"
+ " EnumerableTableScan(table=[[adhoc, m{32, 36}")
.returnsUnordered("the_year=1997; quarter=Q1; C=21588",
"the_year=1997; quarter=Q2; C=20368",
"the_year=1997; quarter=Q3; C=21453",
"the_year=1997; quarter=Q4; C=23428")
.sameResultWithMaterializationsDisabled();
}
/** Tests a query that uses a pre-defined aggregate table at a lower
* granularity. Includes a measure computed from a grouping column, a measure
* based on COUNT rolled up using SUM, and an expression on a measure. */
@Test void testLatticeWithPreDefinedTilesRollUp() {
foodmartModelWithOneTile()
.query("select t.\"the_year\",\n"
+ " count(*) as c,\n"
+ " min(\"quarter\") as q,\n"
+ " sum(\"unit_sales\") * 10 as us\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n"
+ "group by t.\"the_year\"")
.enableMaterializations(true)
.explainContains(""
+ "EnumerableCalc(expr#0..3=[{inputs}], expr#4=[10], expr#5=[*($t3, $t4)], proj#0..2=[{exprs}], US=[$t5])\n"
+ " EnumerableAggregate(group=[{0}], C=[$SUM0($2)], Q=[MIN($1)], agg#2=[$SUM0($4)])\n"
+ " EnumerableTableScan(table=[[adhoc, m{32, 36}")
.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.ORACLE)
.returnsUnordered("the_year=1997; C=86837; Q=Q1; US=2667730.0000")
.sameResultWithMaterializationsDisabled();
}
/** Tests a model that uses an algorithm to generate an initial set of
* tiles.
*
* <p>Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-428">[CALCITE-428]
* Use optimization algorithm to suggest which tiles of a lattice to
* materialize</a>. */
@Test void testTileAlgorithm() {
final String explain = "EnumerableAggregate(group=[{2, 3}])\n"
+ " EnumerableTableScan(table=[[adhoc, m{16, 17, 32, 36, 37}]])";
checkTileAlgorithm(
FoodMartLatticeStatisticProvider.class.getCanonicalName() + "#FACTORY",
explain);
}
/** As {@link #testTileAlgorithm()}, but uses the
* {@link Lattices#CACHED_SQL} statistics provider. */
@Test void testTileAlgorithm2() {
// Different explain than above, but note that it still selects columns
// (27, 31).
final String explain = "EnumerableAggregate(group=[{4, 5}])\n"
+ " EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31, 32, 36, 37}]";
checkTileAlgorithm(Lattices.class.getCanonicalName() + "#CACHED_SQL",
explain);
}
/** As {@link #testTileAlgorithm()}, but uses the
* {@link Lattices#PROFILER} statistics provider. */
@Test void testTileAlgorithm3() {
assumeTrue(TestUtil.getJavaMajorVersion() >= 8,
"Yahoo sketches requires JDK 8 or higher");
final String explain = "EnumerableAggregate(group=[{4, 5}])\n"
+ " EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31, 32, 36, 37}]";
checkTileAlgorithm(Lattices.class.getCanonicalName() + "#PROFILER",
explain);
}
private void checkTileAlgorithm(String statisticProvider,
String expectedExplain) {
final RelOptRule[] rules = {
MaterializedViewRules.PROJECT_FILTER,
MaterializedViewRules.FILTER,
MaterializedViewRules.PROJECT_JOIN,
MaterializedViewRules.JOIN,
MaterializedViewRules.PROJECT_AGGREGATE,
MaterializedViewRules.AGGREGATE
};
MaterializationService.setThreadLocal();
MaterializationService.instance().clear();
foodmartLatticeModel(statisticProvider)
.query("select distinct t.\"the_year\", t.\"quarter\"\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
.enableMaterializations(true)
// Disable materialization rules from this test. For some reason, there is
// a weird interaction between these rules and the lattice rewriting that
// produces non-deterministic rewriting (even when only lattices are present).
// For more context, see
// <a href="https://issues.apache.org/jira/browse/CALCITE-2953">[CALCITE-2953]</a>.
.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner ->
Arrays.asList(rules).forEach(planner::removeRule))
// disable for MySQL; times out running star-join query
// disable for H2; it thinks our generated SQL has invalid syntax
.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL
&& CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2)
.explainContains(expectedExplain)
.returnsUnordered("the_year=1997; quarter=Q1",
"the_year=1997; quarter=Q2",
"the_year=1997; quarter=Q3",
"the_year=1997; quarter=Q4");
}
private static CalciteAssert.AssertThat foodmartLatticeModel(
String statisticProvider) {
return foodmartModel(" auto: false,\n"
+ " algorithm: true,\n"
+ " algorithmMaxMillis: -1,\n"
+ " rowCountEstimate: 87000,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'unit_sales'\n"
+ " }, {\n"
+ " agg: 'sum',\n"
+ " args: 'store_sales'\n"
+ " }, {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " statisticProvider: '"
+ statisticProvider
+ "',\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n");
}
/** Tests a query that is created within {@link #testTileAlgorithm()}. */
@Test void testJG() {
final String sql = ""
+ "SELECT \"s\".\"unit_sales\", \"p\".\"recyclable_package\", \"t\".\"the_day\", \"t\".\"the_year\", \"t\".\"quarter\", \"pc\".\"product_family\", COUNT(*) AS \"m0\", SUM(\"s\".\"store_sales\") AS \"m1\", SUM(\"s\".\"unit_sales\") AS \"m2\"\n"
+ "FROM \"foodmart\".\"sales_fact_1997\" AS \"s\"\n"
+ "JOIN \"foodmart\".\"product\" AS \"p\" ON \"s\".\"product_id\" = \"p\".\"product_id\"\n"
+ "JOIN \"foodmart\".\"time_by_day\" AS \"t\" ON \"s\".\"time_id\" = \"t\".\"time_id\"\n"
+ "JOIN \"foodmart\".\"product_class\" AS \"pc\" ON \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"\n"
+ "GROUP BY \"s\".\"unit_sales\", \"p\".\"recyclable_package\", \"t\".\"the_day\", \"t\".\"the_year\", \"t\".\"quarter\", \"pc\".\"product_family\"";
final String explain = "JdbcToEnumerableConverter\n"
+ " JdbcAggregate(group=[{3, 6, 8, 9, 10, 12}], m0=[COUNT()], m1=[$SUM0($2)], m2=[$SUM0($3)])\n"
+ " JdbcJoin(condition=[=($4, $11)], joinType=[inner])\n"
+ " JdbcJoin(condition=[=($1, $7)], joinType=[inner])\n"
+ " JdbcJoin(condition=[=($0, $5)], joinType=[inner])\n"
+ " JdbcProject(product_id=[$0], time_id=[$1], store_sales=[$5], unit_sales=[$7])\n"
+ " JdbcTableScan(table=[[foodmart, sales_fact_1997]])\n"
+ " JdbcProject(product_class_id=[$0], product_id=[$1], recyclable_package=[$8])\n"
+ " JdbcTableScan(table=[[foodmart, product]])\n"
+ " JdbcProject(time_id=[$0], the_day=[$2], the_year=[$4], quarter=[$8])\n"
+ " JdbcTableScan(table=[[foodmart, time_by_day]])\n"
+ " JdbcProject(product_class_id=[$0], product_family=[$4])\n"
+ " JdbcTableScan(table=[[foodmart, product_class]])";
CalciteAssert.that().with(CalciteAssert.Config.JDBC_FOODMART)
.query(sql)
.explainContains(explain);
}
/** Tests a query that uses no columns from the fact table. */
@Test void testGroupByEmpty() {
foodmartModel()
.query("select count(*) as c from \"foodmart\".\"sales_fact_1997\"")
.enableMaterializations(true)
.returnsUnordered("C=86837");
}
/** Calls {@link #testDistinctCount()} followed by
* {@link #testGroupByEmpty()}. */
@Test void testGroupByEmptyWithPrelude() {
testDistinctCount();
testGroupByEmpty();
}
/** Tests a query that uses no dimension columns and one measure column. */
@Test void testGroupByEmpty2() {
foodmartModel()
.query("select sum(\"unit_sales\") as s\n"
+ "from \"foodmart\".\"sales_fact_1997\"")
.enableMaterializations(true)
.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.ORACLE)
.returnsUnordered("S=266773.0000");
}
/** Tests that two queries of the same dimensionality that use different
* measures can use the same materialization. */
@Test void testGroupByEmpty3() {
final List<String> mats = new ArrayList<>();
final CalciteAssert.AssertThat that = foodmartModel().pooled();
that.query("select sum(\"unit_sales\") as s, count(*) as c\n"
+ "from \"foodmart\".\"sales_fact_1997\"")
.withHook(Hook.CREATE_MATERIALIZATION, (Consumer<String>) mats::add)
.enableMaterializations(true)
.explainContains("EnumerableTableScan(table=[[adhoc, m{}]])")
.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.ORACLE)
.returnsUnordered("S=266773.0000; C=86837");
assertThat(mats.toString(), mats.size(), equalTo(2));
// A similar query can use the same materialization.
that.query("select sum(\"unit_sales\") as s\n"
+ "from \"foodmart\".\"sales_fact_1997\"")
.withHook(Hook.CREATE_MATERIALIZATION, (Consumer<String>) mats::add)
.enableMaterializations(true)
.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.ORACLE)
.returnsUnordered("S=266773.0000");
assertThat(mats.toString(), mats.size(), equalTo(2));
}
/** Rolling up SUM. */
@Test void testSum() {
foodmartModelWithOneTile()
.query("select sum(\"unit_sales\") as c\n"
+ "from \"foodmart\".\"sales_fact_1997\"\n"
+ "group by \"product_id\"\n"
+ "order by 1 desc limit 1")
.enableMaterializations(true)
.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.ORACLE)
.returnsUnordered("C=267.0000");
}
/** Tests a distinct-count query.
*
* <p>We can't just roll up count(distinct ...) as we do count(...), but we
* can still use the aggregate table if we're smart. */
@Test void testDistinctCount() {
foodmartModelWithOneTile()
.query("select count(distinct \"quarter\") as c\n"
+ "from \"foodmart\".\"sales_fact_1997\"\n"
+ "join \"foodmart\".\"time_by_day\" using (\"time_id\")\n"
+ "group by \"the_year\"")
.enableMaterializations(true)
.explainContains("EnumerableCalc(expr#0..1=[{inputs}], C=[$t1])\n"
+ " EnumerableAggregate(group=[{0}], C=[COUNT($1)])\n"
+ " EnumerableTableScan(table=[[adhoc, m{32, 36}]])")
.returnsUnordered("C=4");
}
@Test void testDistinctCount2() {
foodmartModelWithOneTile()
.query("select count(distinct \"the_year\") as c\n"
+ "from \"foodmart\".\"sales_fact_1997\"\n"
+ "join \"foodmart\".\"time_by_day\" using (\"time_id\")\n"
+ "group by \"the_year\"")
.enableMaterializations(true)
.explainContains("EnumerableCalc(expr#0=[{inputs}], expr#1=[IS NOT NULL($t0)], "
+ "expr#2=[1:BIGINT], expr#3=[0:BIGINT], expr#4=[CASE($t1, $t2, $t3)], C=[$t4])\n"
+ " EnumerableAggregate(group=[{0}])\n"
+ " EnumerableTableScan(table=[[adhoc, m{32, 36}]])")
.returnsUnordered("C=1");
}
/** Runs all queries against the Foodmart schema, using a lattice.
*
* <p>Disabled for normal runs, because it is slow. */
@Disabled
@Test void testAllFoodmartQueries() {
// Test ids that had bugs in them until recently. Useful for a sanity check.
final List<Integer> fixed = ImmutableList.of(13, 24, 28, 30, 61, 76, 79, 81,
85, 98, 101, 107, 128, 129, 130, 131);
// Test ids that still have bugs
final List<Integer> bad = ImmutableList.of(382, 423);
for (int i = 1; i < 1000; i++) {
System.out.println("i=" + i);
try {
if (bad.contains(i)) {
continue;
}
check(i);
} catch (Throwable e) {
throw new RuntimeException("error in " + i, e);
}
}
}
private void check(int n) throws IOException {
final FoodMartQuerySet set = FoodMartQuerySet.instance();
final FoodMartQuerySet.FoodmartQuery query = set.queries.get(n);
if (query == null) {
return;
}
foodmartModelWithOneTile()
.withDefaultSchema("foodmart")
.query(query.sql)
.sameResultWithMaterializationsDisabled();
}
/** A tile with no measures should inherit default measure list from the
* lattice. */
@Test void testTileWithNoMeasures() {
foodmartModel(" auto: false,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.query("select count(t.\"the_year\", t.\"quarter\")\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
.enableMaterializations(true)
.explainContains("EnumerableAggregate(group=[{}], EXPR$0=[COUNT($0, $1)])\n"
+ " EnumerableTableScan(table=[[adhoc, m{32, 36}")
.returnsCount(1);
}
/** A lattice with no default measure list should get "count(*)" is its
* default measure. */
@Test void testLatticeWithNoMeasures() {
foodmartModel(" auto: false,\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.query("select count(*)\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
.enableMaterializations(true)
.explainContains("EnumerableAggregate(group=[{}], EXPR$0=[COUNT()])\n"
+ " EnumerableTableScan(table=[[adhoc, m{32, 36}")
.returnsCount(1);
}
@Test void testDimensionIsInvalidColumn() {
foodmartModel(" auto: false,\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'invalid_column'],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.connectThrows("Unknown lattice column 'invalid_column'");
}
@Test void testMeasureArgIsInvalidColumn() {
foodmartModel(" auto: false,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'invalid_column'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.connectThrows("Unknown lattice column 'invalid_column'");
}
/** It is an error for "time_id" to be a measure arg, because is not a
* unique alias. Both "s" and "t" have "time_id". */
@Test void testMeasureArgIsNotUniqueAlias() {
foodmartModel(" auto: false,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count',\n"
+ " args: 'time_id'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.connectThrows("Lattice column alias 'time_id' is not unique");
}
@Test void testMeasureAggIsInvalid() {
foodmartModel(" auto: false,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'invalid_count',\n"
+ " args: 'customer_id'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
+ " } ]\n")
.connectThrows("Unknown lattice aggregate function invalid_count");
}
@Test void testTwoLattices() {
final AtomicInteger counter = new AtomicInteger();
// disable for MySQL; times out running star-join query
// disable for H2; it thinks our generated SQL has invalid syntax
final boolean enabled =
CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL
&& CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2;
modelWithLattices(SALES_LATTICE, INVENTORY_LATTICE)
.query("select s.\"unit_sales\", p.\"brand_name\"\n"
+ "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ "join \"foodmart\".\"product\" as p using (\"product_id\")\n")
.enableMaterializations(true)
.enable(enabled)
.substitutionMatches(
CalciteAssert.checkRel(
"LogicalProject(unit_sales=[$7], brand_name=[$10])\n"
+ " LogicalProject(product_id=[$0], time_id=[$1], customer_id=[$2], promotion_id=[$3], store_id=[$4], store_sales=[$5], store_cost=[$6], unit_sales=[$7], product_class_id=[$8], product_id0=[$9], brand_name=[$10], product_name=[$11], SKU=[$12], SRP=[$13], gross_weight=[$14], net_weight=[$15], recyclable_package=[$16], low_fat=[$17], units_per_case=[$18], cases_per_pallet=[$19], shelf_width=[$20], shelf_height=[$21], shelf_depth=[$22])\n"
+ " StarTableScan(table=[[adhoc, star]])\n",
counter));
if (enabled) {
assertThat(counter.intValue(), is(1));
}
}
/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-787">[CALCITE-787]
* Star table wrongly assigned to materialized view</a>. */
@Test void testOneLatticeOneMV() {
final AtomicInteger counter = new AtomicInteger();
final Class<JdbcTest.EmpDeptTableFactory> clazz =
JdbcTest.EmpDeptTableFactory.class;
final String mv = " materializations: [\n"
+ " {\n"
+ " table: \"m0\",\n"
+ " view: \"m0v\",\n"
+ " sql: \"select * from \\\"foodmart\\\".\\\"sales_fact_1997\\\" "
+ "where \\\"product_id\\\" = 10\" "
+ " }\n"
+ " ]\n";
final String model = ""
+ "{\n"
+ " version: '1.0',\n"
+ " schemas: [\n"
+ FoodmartSchema.FOODMART_SCHEMA
+ ",\n"
+ " {\n"
+ " name: 'adhoc',\n"
+ " tables: [\n"
+ " {\n"
+ " name: 'EMPLOYEES',\n"
+ " type: 'custom',\n"
+ " factory: '"
+ clazz.getName()
+ "',\n"
+ " operand: {'foo': true, 'bar': 345}\n"
+ " }\n"
+ " ],\n"
+ " lattices: " + "[" + INVENTORY_LATTICE
+ " ]\n"
+ " },\n"
+ " {\n"
+ " name: 'mat',\n"
+ mv
+ " }\n"
+ " ]\n"
+ "}";
CalciteAssert.model(model)
.withDefaultSchema("foodmart")
.query("select * from \"foodmart\".\"sales_fact_1997\" where \"product_id\" = 10")
.enableMaterializations(true)
.substitutionMatches(
CalciteAssert.checkRel(
"LogicalTableScan(table=[[mat, m0]])\n",
counter));
assertThat(counter.intValue(), equalTo(1));
}
/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-760">[CALCITE-760]
* Aggregate recommender blows up if row count estimate is too high</a>. */
@Disabled
@Test void testLatticeWithBadRowCountEstimate() {
final String lattice =
INVENTORY_LATTICE.replace("rowCountEstimate: 4070,",
"rowCountEstimate: 4074070,");
assertNotEquals(lattice, INVENTORY_LATTICE);
modelWithLattices(lattice)
.query("values 1\n")
.returns("EXPR$0=1\n");
}
@Test void testSuggester() {
final Class<JdbcTest.EmpDeptTableFactory> clazz =
JdbcTest.EmpDeptTableFactory.class;
final String model = ""
+ "{\n"
+ " version: '1.0',\n"
+ " schemas: [\n"
+ FoodmartSchema.FOODMART_SCHEMA
+ ",\n"
+ " {\n"
+ " name: 'adhoc',\n"
+ " tables: [\n"
+ " {\n"
+ " name: 'EMPLOYEES',\n"
+ " type: 'custom',\n"
+ " factory: '" + clazz.getName() + "',\n"
+ " operand: {'foo': true, 'bar': 345}\n"
+ " }\n"
+ " ],\n"
+ " \"autoLattice\": true"
+ " }\n"
+ " ]\n"
+ "}";
final String sql = "select count(*)\n"
+ "from \"sales_fact_1997\"\n"
+ "join \"time_by_day\" using (\"time_id\")\n";
final String explain = "PLAN=JdbcToEnumerableConverter\n"
+ " JdbcAggregate(group=[{}], EXPR$0=[COUNT()])\n"
+ " JdbcJoin(condition=[=($0, $1)], joinType=[inner])\n"
+ " JdbcProject(time_id=[$1])\n"
+ " JdbcTableScan(table=[[foodmart, sales_fact_1997]])\n"
+ " JdbcProject(time_id=[$0])\n"
+ " JdbcTableScan(table=[[foodmart, time_by_day]])\n";
CalciteAssert.model(model)
.withDefaultSchema("foodmart")
.query(sql)
.returns("EXPR$0=86837\n")
.explainContains(explain);
}
private static CalciteAssert.AssertThat foodmartModel(String... extras) {
final String sql = "select 1\n"
+ "from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+ "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
+ "join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")\n"
+ "join \"foodmart\".\"product_class\" as \"pc\"\n"
+ " on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"";
return modelWithLattice("star", sql, extras);
}
private CalciteAssert.AssertThat foodmartModelWithOneTile() {
return foodmartModel(" auto: false,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'count'\n"
+ " } ],\n"
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'unit_sales'\n"
+ " }, {\n"
+ " agg: 'sum',\n"
+ " args: 'store_sales'\n"
+ " }, {\n"
+ " agg: 'count'\n"
+ " } ]\n"
+ " } ]\n");
}
// Just for debugging.
private static void runJdbc() throws SQLException {
final Connection connection = DriverManager.getConnection(
"jdbc:calcite:model=core/src/test/resources/mysql-foodmart-lattice-model.json");
final ResultSet resultSet = connection.createStatement()
.executeQuery("select * from \"adhoc\".\"m{32, 36}\"");
System.out.println(CalciteAssert.toString(resultSet));
connection.close();
}
/** Unit test for {@link Lattice#getRowCount(double, List)}. */
@Test void testColumnCount() {
assertThat(Lattice.getRowCount(10, 2, 3), within(5.03D, 0.01D));
assertThat(Lattice.getRowCount(10, 9, 8), within(9.4D, 0.01D));
assertThat(Lattice.getRowCount(100, 9, 8), within(54.2D, 0.1D));
assertThat(Lattice.getRowCount(1000, 9, 8), within(72D, 0.01D));
assertThat(Lattice.getRowCount(1000, 1, 1), is(1D));
assertThat(Lattice.getRowCount(1, 3, 5), within(1D, 0.01D));
assertThat(Lattice.getRowCount(1, 3, 5, 13, 4831), within(1D, 0.01D));
}
}