| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to you under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.calcite.test; |
| |
| import org.apache.calcite.adapter.druid.DruidQuery; |
| import org.apache.calcite.config.CalciteConnectionConfig; |
| import org.apache.calcite.config.CalciteConnectionProperty; |
| import org.apache.calcite.rel.RelNode; |
| import org.apache.calcite.rel.type.RelDataType; |
| import org.apache.calcite.rex.RexNode; |
| import org.apache.calcite.sql.fun.SqlStdOperatorTable; |
| import org.apache.calcite.sql.type.SqlTypeName; |
| import org.apache.calcite.tools.RelBuilder; |
| import org.apache.calcite.util.Util; |
| |
| import com.google.common.base.Function; |
| import com.google.common.collect.ArrayListMultimap; |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.ImmutableMap; |
| import com.google.common.collect.Multimap; |
| |
| import org.junit.Ignore; |
| import org.junit.Test; |
| |
| import java.net.URL; |
| import java.sql.Connection; |
| import java.sql.DatabaseMetaData; |
| import java.sql.ResultSet; |
| import java.sql.SQLException; |
| import java.util.List; |
| |
| import static org.hamcrest.CoreMatchers.containsString; |
| import static org.hamcrest.CoreMatchers.is; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertThat; |
| import static org.junit.Assert.assertTrue; |
| |
| /** |
| * Tests for the {@code org.apache.calcite.adapter.druid} package. |
| * |
| * <p>Before calling this test, you need to populate Druid, as follows: |
| * |
| * <blockquote><code> |
| * git clone https://github.com/vlsi/calcite-test-dataset<br> |
| * cd calcite-test-dataset<br> |
| * mvn install |
| * </code></blockquote> |
| * |
| * <p>This will create a virtual machine with Druid and test data set. |
| * |
| * <p>Features not yet implemented: |
| * <ul> |
| * <li>push LIMIT into "select" query</li> |
| * <li>push SORT and/or LIMIT into "groupBy" query</li> |
| * <li>push HAVING into "groupBy" query</li> |
| * </ul> |
| */ |
| public class DruidAdapterIT { |
| /** URL of the "druid-foodmart" model. */ |
| public static final URL FOODMART = |
| DruidAdapterIT.class.getResource("/druid-foodmart-model.json"); |
| |
| /** URL of the "druid-wiki" model |
| * and the "wikiticker" data set. */ |
| public static final URL WIKI = |
| DruidAdapterIT.class.getResource("/druid-wiki-model.json"); |
| |
| /** URL of the "druid-wiki-no-columns" model |
| * and the "wikiticker" data set. */ |
| public static final URL WIKI_AUTO = |
| DruidAdapterIT.class.getResource("/druid-wiki-no-columns-model.json"); |
| |
| /** URL of the "druid-wiki-no-tables" model |
| * and the "wikiticker" data set. */ |
| public static final URL WIKI_AUTO2 = |
| DruidAdapterIT.class.getResource("/druid-wiki-no-tables-model.json"); |
| |
| /** Whether to run Druid tests. Enabled by default, however test is only |
| * included if "it" profile is activated ({@code -Pit}). To disable, |
| * specify {@code -Dcalcite.test.druid=false} on the Java command line. */ |
| public static final boolean ENABLED = |
| Util.getBooleanProperty("calcite.test.druid", true); |
| |
| private static final String VARCHAR_TYPE = |
| "VARCHAR CHARACTER SET \"ISO-8859-1\" COLLATE \"ISO-8859-1$en_US$primary\""; |
| |
| /** Whether to run this test. */ |
| protected boolean enabled() { |
| return ENABLED; |
| } |
| |
| /** Returns a function that checks that a particular Druid query is |
| * generated to implement a query. */ |
| private static Function<List, Void> druidChecker(final String... lines) { |
| return new Function<List, Void>() { |
| public Void apply(List list) { |
| assertThat(list.size(), is(1)); |
| DruidQuery.QuerySpec querySpec = (DruidQuery.QuerySpec) list.get(0); |
| for (String line : lines) { |
| final String s = line.replace('\'', '"'); |
| assertThat(querySpec.getQueryString(null, -1), containsString(s)); |
| } |
| return null; |
| } |
| }; |
| } |
| |
| /** Creates a query against a data set given by a map. */ |
| private CalciteAssert.AssertQuery sql(String sql, URL url) { |
| return CalciteAssert.that() |
| .enable(enabled()) |
| .with(ImmutableMap.of("model", url.getPath())) |
| .query(sql); |
| } |
| |
| /** Creates a query against the {@link #FOODMART} data set. */ |
| private CalciteAssert.AssertQuery sql(String sql) { |
| return sql(sql, FOODMART); |
| } |
| |
| /** Tests a query against the {@link #WIKI} data set. |
| * |
| * <p>Most of the others in this suite are against {@link #FOODMART}, |
| * but our examples in "druid-adapter.md" use wikiticker. */ |
| @Test public void testSelectDistinctWiki() { |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wiki]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "filter=[=($13, 'Jeremy Corbyn')], groups=[{5}], aggs=[[]])\n"; |
| checkSelectDistinctWiki(WIKI, "wiki") |
| .explainContains(explain); |
| } |
| |
| @Test public void testSelectDistinctWikiNoColumns() { |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wiki]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "filter=[=($17, 'Jeremy Corbyn')], groups=[{7}], aggs=[[]])\n"; |
| checkSelectDistinctWiki(WIKI_AUTO, "wiki") |
| .explainContains(explain); |
| } |
| |
| @Test public void testSelectDistinctWikiNoTables() { |
| // Compared to testSelectDistinctWiki, table name is different (because it |
| // is the raw dataSource name from Druid) and the field offsets are |
| // different. This is expected. |
| // Interval is different, as default is taken. |
| final String sql = "select distinct \"countryName\"\n" |
| + "from \"wikiticker\"\n" |
| + "where \"page\" = 'Jeremy Corbyn'"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], " |
| + "intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], " |
| + "filter=[=($17, 'Jeremy Corbyn')], groups=[{7}], aggs=[[]])\n"; |
| final String druidQuery = "{'queryType':'groupBy'," |
| + "'dataSource':'wikiticker','granularity':'all'," |
| + "'dimensions':[{'type':'default','dimension':'countryName'}],'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'selector','dimension':'page','value':'Jeremy Corbyn'}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-01T00:00:00.000/3000-01-01T00:00:00.000']}"; |
| sql(sql, WIKI_AUTO2) |
| .returnsUnordered("countryName=United Kingdom", |
| "countryName=null") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| // Because no tables are declared, foodmart is automatically present. |
| sql("select count(*) as c from \"foodmart\"", WIKI_AUTO2) |
| .returnsUnordered("C=86829"); |
| } |
| |
| @Test public void testSelectTimestampColumnNoTables1() { |
| // Since columns are not explicitly declared, we use the default time |
| // column in the query. |
| final String sql = "select sum(\"added\")\n" |
| + "from \"wikiticker\"\n" |
| + "group by floor(\"__time\" to DAY)"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " BindableProject(EXPR$0=[$1])\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], projects=[[FLOOR($0, FLAG(DAY)), $1]], groups=[{0}], aggs=[[SUM($1)]])\n"; |
| final String druidQuery = "{'queryType':'timeseries'," |
| + "'dataSource':'wikiticker','descending':false,'granularity':'day'," |
| + "'aggregations':[{'type':'longSum','name':'EXPR$0','fieldName':'added'}]," |
| + "'intervals':['1900-01-01T00:00:00.000/3000-01-01T00:00:00.000']," |
| + "'context':{'skipEmptyBuckets':true}}"; |
| sql(sql, WIKI_AUTO2) |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testSelectTimestampColumnNoTables2() { |
| // Since columns are not explicitly declared, we use the default time |
| // column in the query. |
| final String sql = "select \"__time\"\n" |
| + "from \"wikiticker\"\n" |
| + "limit 1\n"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], projects=[[$0]], fetch=[1])\n"; |
| final String druidQuery = "{'queryType':'select'," |
| + "'dataSource':'wikiticker','descending':false," |
| + "'intervals':['1900-01-01T00:00:00.000/3000-01-01T00:00:00.000']," |
| + "'dimensions':[],'metrics':[],'granularity':'all','pagingSpec':{'threshold':1,'fromNext':true}," |
| + "'context':{'druid.query.fetch':true}}"; |
| sql(sql, WIKI_AUTO2) |
| .returnsUnordered("__time=2015-09-12 00:46:58") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testSelectTimestampColumnNoTables3() { |
| // Since columns are not explicitly declared, we use the default time |
| // column in the query. |
| final String sql = "select floor(\"__time\" to DAY) as \"day\", sum(\"added\")\n" |
| + "from \"wikiticker\"\n" |
| + "group by floor(\"__time\" to DAY)"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], projects=[[FLOOR($0, FLAG(DAY)), $1]], groups=[{0}], aggs=[[SUM($1)]])\n"; |
| final String druidQuery = "{'queryType':'timeseries'," |
| + "'dataSource':'wikiticker','descending':false,'granularity':'day'," |
| + "'aggregations':[{'type':'longSum','name':'EXPR$1','fieldName':'added'}]," |
| + "'intervals':['1900-01-01T00:00:00.000/3000-01-01T00:00:00.000']," |
| + "'context':{'skipEmptyBuckets':true}}"; |
| sql(sql, WIKI_AUTO2) |
| .returnsUnordered("day=2015-09-12 00:00:00; EXPR$1=9385573") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testSelectTimestampColumnNoTables4() { |
| // Since columns are not explicitly declared, we use the default time |
| // column in the query. |
| final String sql = "select sum(\"added\") as \"s\", \"page\", " |
| + "floor(\"__time\" to DAY) as \"day\"\n" |
| + "from \"wikiticker\"\n" |
| + "group by \"page\", floor(\"__time\" to DAY)\n" |
| + "order by \"s\" desc"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " BindableProject(s=[$2], page=[$0], day=[$1])\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], " |
| + "intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], projects=[[$17, FLOOR" |
| + "($0, FLAG(DAY)), $1]], groups=[{0, 1}], aggs=[[SUM($2)]], sort0=[2], dir0=[DESC])"; |
| sql(sql, WIKI_AUTO2) |
| .limit(1) |
| .returnsUnordered("s=199818; page=User:QuackGuru/Electronic cigarettes 1; " |
| + "day=2015-09-12 00:00:00") |
| .explainContains(explain) |
| .queryContains( |
| druidChecker("'queryType':'groupBy'", "'limitSpec':{'type':'default'," |
| + "'columns':[{'dimension':'s','direction':'descending','dimensionOrder':'numeric'}]}")); |
| } |
| |
| @Test public void testSkipEmptyBuckets() { |
| final String sql = "select floor(\"__time\" to SECOND) as \"second\", sum(\"added\")\n" |
| + "from \"wikiticker\"\n" |
| + "where \"page\" = 'Jeremy Corbyn'\n" |
| + "group by floor(\"__time\" to SECOND)"; |
| final String druidQuery = "{'queryType':'timeseries'," |
| + "'dataSource':'wikiticker','descending':false,'granularity':'second'," |
| + "'filter':{'type':'selector','dimension':'page','value':'Jeremy Corbyn'}," |
| + "'aggregations':[{'type':'longSum','name':'EXPR$1','fieldName':'added'}]," |
| + "'intervals':['1900-01-01T00:00:00.000/3000-01-01T00:00:00.000']," |
| + "'context':{'skipEmptyBuckets':true}}"; |
| sql(sql, WIKI_AUTO2) |
| .limit(1) |
| // Result without 'skipEmptyBuckets':true => "second=2015-09-12 00:46:58; EXPR$1=0" |
| .returnsUnordered("second=2015-09-12 01:20:19; EXPR$1=1075") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| private CalciteAssert.AssertQuery checkSelectDistinctWiki(URL url, String tableName) { |
| final String sql = "select distinct \"countryName\"\n" |
| + "from \"" + tableName + "\"\n" |
| + "where \"page\" = 'Jeremy Corbyn'"; |
| final String druidQuery = "{'queryType':'groupBy'," |
| + "'dataSource':'wikiticker','granularity':'all'," |
| + "'dimensions':[{'type':'default','dimension':'countryName'}],'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'selector','dimension':'page','value':'Jeremy Corbyn'}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| return sql(sql, url) |
| .returnsUnordered("countryName=United Kingdom", |
| "countryName=null") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1617">[CALCITE-1617] |
| * Druid adapter: Send timestamp literals to Druid as local time, not |
| * UTC</a>. */ |
| @Test public void testFilterTime() { |
| final String sql = "select \"__time\"\n" |
| + "from \"wikiticker\"\n" |
| + "where \"__time\" < '2015-10-12 00:00:00'"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], " |
| + "intervals=[[1900-01-01T00:00:00.000/2015-10-12T00:00:00.000]], " |
| + "projects=[[$0]])\n"; |
| final String druidQuery = "{'queryType':'select'," |
| + "'dataSource':'wikiticker','descending':false," |
| + "'intervals':['1900-01-01T00:00:00.000/2015-10-12T00:00:00.000']," |
| + "'dimensions':[],'metrics':[],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true}," |
| + "'context':{'druid.query.fetch':false}}"; |
| sql(sql, WIKI_AUTO2) |
| .limit(2) |
| .returnsUnordered("__time=2015-09-12 00:46:58", |
| "__time=2015-09-12 00:47:00") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testFilterTimeDistinct() { |
| final String sql = "select distinct \"__time\"\n" |
| + "from \"wikiticker\"\n" |
| + "where \"__time\" < '2015-10-12 00:00:00'"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], " |
| + "intervals=[[1900-01-01T00:00:00.000/2015-10-12T00:00:00.000]], " |
| + "groups=[{0}], aggs=[[]])\n"; |
| final String subDruidQuery = "{'queryType':'groupBy','dataSource':'wikiticker'," |
| + "'granularity':'all','dimensions':[{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract'," |
| + "'extractionFn':{'type':'timeFormat'"; |
| sql(sql, WIKI_AUTO2) |
| .limit(2) |
| .returnsUnordered("__time=2015-09-12 00:46:58", |
| "__time=2015-09-12 00:47:00") |
| .explainContains(explain) |
| .queryContains(druidChecker(subDruidQuery)); |
| } |
| |
| @Test public void testMetadataColumns() throws Exception { |
| sql("values 1") |
| .withConnection( |
| new Function<Connection, Void>() { |
| public Void apply(Connection c) { |
| try { |
| final DatabaseMetaData metaData = c.getMetaData(); |
| final ResultSet r = |
| metaData.getColumns(null, null, "foodmart", null); |
| Multimap<String, Boolean> map = ArrayListMultimap.create(); |
| while (r.next()) { |
| map.put(r.getString("TYPE_NAME"), true); |
| } |
| // 1 timestamp, 2 float measure, 1 int measure, 88 dimensions |
| assertThat(map.keySet().size(), is(4)); |
| assertThat(map.values().size(), is(92)); |
| assertThat(map.get("TIMESTAMP(0)").size(), is(1)); |
| assertThat(map.get("DOUBLE").size(), is(2)); |
| assertThat(map.get("BIGINT").size(), is(1)); |
| assertThat(map.get(VARCHAR_TYPE).size(), is(88)); |
| } catch (SQLException e) { |
| throw new RuntimeException(e); |
| } |
| return null; |
| } |
| }); |
| } |
| |
| @Test public void testSelectDistinct() { |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{30}], aggs=[[]])"; |
| final String sql = "select distinct \"state_province\" from \"foodmart\""; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart','granularity':'all'," |
| + "'dimensions':[{'type':'default','dimension':'state_province'}],'limitSpec':{'type':'default'}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sql) |
| .returnsUnordered("state_province=CA", |
| "state_province=OR", |
| "state_province=WA") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Ignore("TODO: fix invalid cast from Integer to Long") |
| @Test public void testSelectGroupBySum() { |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], projects=[[$29, CAST($88):INTEGER]], groups=[{0}], aggs=[[SUM($1)]])"; |
| final String sql = "select \"state_province\", sum(cast(\"unit_sales\" as integer)) as u\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\""; |
| sql(sql) |
| .returnsUnordered("state_province=CA; U=74748", |
| "state_province=OR; U=67659", |
| "state_province=WA; U=124366") |
| .explainContains(explain); |
| } |
| |
| @Test public void testGroupbyMetric() { |
| final String sql = "select \"store_sales\" ,\"product_id\" from \"foodmart\" " |
| + "where \"product_id\" = 1020" + "group by \"store_sales\" ,\"product_id\" "; |
| final String plan = "PLAN=EnumerableInterpreter\n BindableAggregate(group=[{0, 1}])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[=($1, 1020)]," |
| + " projects=[[$90, $1]])\n"; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart','descending':false," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'filter':{'type':'selector','dimension':'product_id','value':'1020'}," |
| + "'dimensions':['product_id'],'metrics':['store_sales'],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true}," |
| + "'context':{'druid.query.fetch':false}}"; |
| sql(sql) |
| .explainContains(plan) |
| .queryContains(druidChecker(druidQuery)) |
| .returnsUnordered("store_sales=0.5099999904632568; product_id=1020", |
| "store_sales=1.0199999809265137; product_id=1020", |
| "store_sales=1.5299999713897705; product_id=1020", |
| "store_sales=2.0399999618530273; product_id=1020", |
| "store_sales=2.549999952316284; product_id=1020"); |
| } |
| |
| @Test public void testPushSimpleGroupBy() { |
| final String sql = "select \"product_id\" from \"foodmart\" where " |
| + "\"product_id\" = 1020 group by \"product_id\""; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'product_id'}]," |
| + "'limitSpec':{'type':'default'},'filter':{'type':'selector'," |
| + "'dimension':'product_id','value':'1020'}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg'," |
| + "'fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sql).queryContains(druidChecker(druidQuery)).returnsUnordered("product_id=1020"); |
| } |
| |
| @Test public void testComplexPushGroupBy() { |
| final String innerQuery = "select \"product_id\" as \"id\" from \"foodmart\" where " |
| + "\"product_id\" = 1020"; |
| final String sql = "select \"id\" from (" + innerQuery + ") group by \"id\""; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all'," |
| + "'dimensions':[{'type':'default','dimension':'product_id'}]," |
| + "'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'selector','dimension':'product_id','value':'1020'}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sql) |
| .returnsUnordered("id=1020") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1281">[CALCITE-1281] |
| * Druid adapter wrongly returns all numeric values as int or float</a>. */ |
| @Test public void testSelectCount() { |
| final String sql = "select count(*) as c from \"foodmart\""; |
| sql(sql) |
| .returns(new Function<ResultSet, Void>() { |
| public Void apply(ResultSet input) { |
| try { |
| assertThat(input.next(), is(true)); |
| assertThat(input.getInt(1), is(86829)); |
| assertThat(input.getLong(1), is(86829L)); |
| assertThat(input.getString(1), is("86829")); |
| assertThat(input.wasNull(), is(false)); |
| assertThat(input.next(), is(false)); |
| return null; |
| } catch (SQLException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }); |
| } |
| |
| @Test public void testSort() { |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$39, $30]], " |
| + "groups=[{0, 1}], aggs=[[]], sort0=[1], sort1=[0], dir0=[ASC], dir1=[DESC])"; |
| final String sql = "select distinct \"gender\", \"state_province\"\n" |
| + "from \"foodmart\" order by 2, 1 desc"; |
| sql(sql) |
| .returnsOrdered("gender=M; state_province=CA", |
| "gender=F; state_province=CA", |
| "gender=M; state_province=OR", |
| "gender=F; state_province=OR", |
| "gender=M; state_province=WA", |
| "gender=F; state_province=WA") |
| .queryContains( |
| druidChecker("{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'gender'},{'type':'default'," |
| + "'dimension':'state_province'}],'limitSpec':{'type':'default'," |
| + "'columns':[{'dimension':'state_province','direction':'ascending'," |
| + "'dimensionOrder':'alphanumeric'},{'dimension':'gender'," |
| + "'direction':'descending','dimensionOrder':'alphanumeric'}]}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg'," |
| + "'fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}")) |
| .explainContains(explain); |
| } |
| |
| @Test public void testSortLimit() { |
| final String explain = "PLAN=EnumerableLimit(offset=[2], fetch=[3])\n" |
| + " EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$39, $30]], " |
| + "groups=[{0, 1}], aggs=[[]], sort0=[1], sort1=[0], dir0=[ASC], dir1=[DESC])"; |
| final String sql = "select distinct \"gender\", \"state_province\"\n" |
| + "from \"foodmart\"\n" |
| + "order by 2, 1 desc offset 2 rows fetch next 3 rows only"; |
| sql(sql) |
| .returnsOrdered("gender=M; state_province=OR", |
| "gender=F; state_province=OR", |
| "gender=M; state_province=WA") |
| .explainContains(explain); |
| } |
| |
| @Test public void testOffsetLimit() { |
| // We do not yet push LIMIT into a Druid "select" query as a "threshold". |
| // It is not possible to push OFFSET into Druid "select" query. |
| final String sql = "select \"state_province\", \"product_name\"\n" |
| + "from \"foodmart\"\n" |
| + "offset 2 fetch next 3 rows only"; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart'," |
| + "'descending':false,'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'dimensions':['state_province','product_name'],'metrics':[],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true},'context':{'druid.query.fetch':false}}"; |
| sql(sql) |
| .runs() |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testLimit() { |
| final String sql = "select \"gender\", \"state_province\"\n" |
| + "from \"foodmart\" fetch next 3 rows only"; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart'," |
| + "'descending':false,'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'dimensions':['gender','state_province'],'metrics':[],'granularity':'all'," |
| + "'pagingSpec':{'threshold':3,'fromNext':true},'context':{'druid.query.fetch':true}}"; |
| sql(sql) |
| .runs() |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testDistinctLimit() { |
| final String sql = "select distinct \"gender\", \"state_province\"\n" |
| + "from \"foodmart\" fetch next 3 rows only"; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default','dimension':'gender'}," |
| + "{'type':'default','dimension':'state_province'}],'limitSpec':{'type':'default'," |
| + "'limit':3,'columns':[]}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$39, $30]], " |
| + "groups=[{0, 1}], aggs=[[]], fetch=[3])"; |
| sql(sql) |
| .runs() |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)) |
| .returnsUnordered("gender=F; state_province=CA", "gender=F; state_province=OR", |
| "gender=F; state_province=WA"); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1578">[CALCITE-1578] |
| * Druid adapter: wrong semantics of topN query limit with granularity</a>. */ |
| @Test public void testGroupBySortLimit() { |
| final String sql = "select \"brand_name\", \"gender\", sum(\"unit_sales\") as s\n" |
| + "from \"foodmart\"\n" |
| + "group by \"brand_name\", \"gender\"\n" |
| + "order by s desc limit 3"; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'brand_name'},{'type':'default','dimension':'gender'}]," |
| + "'limitSpec':{'type':'default','limit':3,'columns':[{'dimension':'S'," |
| + "'direction':'descending','dimensionOrder':'numeric'}]}," |
| + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "groups=[{2, 39}], aggs=[[SUM($89)]], sort0=[2], dir0=[DESC], fetch=[3])\n"; |
| sql(sql) |
| .runs() |
| .returnsOrdered("brand_name=Hermanos; gender=M; S=4286", |
| "brand_name=Hermanos; gender=F; S=4183", |
| "brand_name=Tell Tale; gender=F; S=4033") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1587">[CALCITE-1587] |
| * Druid adapter: topN returns approximate results</a>. */ |
| @Test public void testGroupBySingleSortLimit() { |
| checkGroupBySingleSortLimit(false); |
| } |
| |
| /** As {@link #testGroupBySingleSortLimit}, but allowing approximate results |
| * due to {@link CalciteConnectionConfig#approximateDistinctCount()}. |
| * Therefore we send a "topN" query to Druid. */ |
| @Test public void testGroupBySingleSortLimitApprox() { |
| checkGroupBySingleSortLimit(true); |
| } |
| |
| private void checkGroupBySingleSortLimit(boolean approx) { |
| final String sql = "select \"brand_name\", sum(\"unit_sales\") as s\n" |
| + "from \"foodmart\"\n" |
| + "group by \"brand_name\"\n" |
| + "order by s desc limit 3"; |
| final String approxDruid = "{'queryType':'topN','dataSource':'foodmart','granularity':'all'," |
| + "'dimension':{'type':'default','dimension':'brand_name'},'metric':'S'," |
| + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'threshold':3}"; |
| final String exactDruid = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'brand_name'}],'limitSpec':{'type':'default','limit':3," |
| + "'columns':[{'dimension':'S','direction':'descending'," |
| + "'dimensionOrder':'numeric'}]},'aggregations':[{'type':'longSum'," |
| + "'name':'S','fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| final String druidQuery = approx ? approxDruid : exactDruid; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "groups=[{2}], aggs=[[SUM($89)]], sort0=[1], dir0=[DESC], fetch=[3])\n"; |
| CalciteAssert.that() |
| .enable(enabled()) |
| .with(ImmutableMap.of("model", FOODMART.getPath())) |
| .with(CalciteConnectionProperty.APPROXIMATE_TOP_N.name(), approx) |
| .query(sql) |
| .runs() |
| .returnsOrdered("brand_name=Hermanos; S=8469", |
| "brand_name=Tell Tale; S=7877", |
| "brand_name=Ebony; S=7438") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1578">[CALCITE-1578] |
| * Druid adapter: wrong semantics of groupBy query limit with granularity</a>. |
| * |
| * <p>Before CALCITE-1578 was fixed, this would use a "topN" query but return |
| * the wrong results. */ |
| @Test public void testGroupByDaySortDescLimit() { |
| final String sql = "select \"brand_name\", floor(\"timestamp\" to DAY) as d," |
| + " sum(\"unit_sales\") as s\n" |
| + "from \"foodmart\"\n" |
| + "group by \"brand_name\", floor(\"timestamp\" to DAY)\n" |
| + "order by s desc limit 30"; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'day','dimensions':[{'type':'default','dimension':'brand_name'}]," |
| + "'limitSpec':{'type':'default'}," |
| + "'aggregations':[{'type':'longSum','name':'S','fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$2, FLOOR" |
| + "($0, FLAG(DAY)), $89]], groups=[{0, 1}], aggs=[[SUM($2)]], sort0=[2], dir0=[DESC], " |
| + "fetch=[30])"; |
| sql(sql) |
| .runs() |
| .returnsStartingWith("brand_name=Ebony; D=1997-07-27 00:00:00; S=135", |
| "brand_name=Tri-State; D=1997-05-09 00:00:00; S=120", |
| "brand_name=Hermanos; D=1997-05-09 00:00:00; S=115") |
| .explainContains(explain) |
| .queryContains( |
| druidChecker("'queryType':'groupBy'", "'granularity':'all'", "'limitSpec" |
| + "':{'type':'default','limit':30,'columns':[{'dimension':'S'," |
| + "'direction':'descending','dimensionOrder':'numeric'}]}")); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1579">[CALCITE-1579] |
| * Druid adapter: wrong semantics of groupBy query limit with |
| * granularity</a>. |
| * |
| * <p>Before CALCITE-1579 was fixed, this would use a "groupBy" query but |
| * wrongly try to use a {@code limitSpec} to sort and filter. (A "topN" query |
| * was not possible because the sort was {@code ASC}.) */ |
| @Test public void testGroupByDaySortLimit() { |
| final String sql = "select \"brand_name\", floor(\"timestamp\" to DAY) as d," |
| + " sum(\"unit_sales\") as s\n" |
| + "from \"foodmart\"\n" |
| + "group by \"brand_name\", floor(\"timestamp\" to DAY)\n" |
| + "order by s desc limit 30"; |
| final String druidQueryPart1 = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'brand_name'},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'floor_day','extractionFn':{'type':'timeFormat'"; |
| final String druidQueryPart2 = "'limitSpec':{'type':'default','limit':30," |
| + "'columns':[{'dimension':'S','direction':'descending'," |
| + "'dimensionOrder':'numeric'}]},'aggregations':[{'type':'longSum'," |
| + "'name':'S','fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$2, FLOOR" |
| + "($0, FLAG(DAY)), $89]], groups=[{0, 1}], aggs=[[SUM($2)]], sort0=[2], dir0=[DESC], " |
| + "fetch=[30])"; |
| sql(sql) |
| .runs() |
| .returnsStartingWith("brand_name=Ebony; D=1997-07-27 00:00:00; S=135", |
| "brand_name=Tri-State; D=1997-05-09 00:00:00; S=120", |
| "brand_name=Hermanos; D=1997-05-09 00:00:00; S=115") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQueryPart1, druidQueryPart2)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1580">[CALCITE-1580] |
| * Druid adapter: Wrong semantics for ordering within groupBy queries</a>. */ |
| @Test public void testGroupByDaySortDimension() { |
| final String sql = "select \"brand_name\", floor(\"timestamp\" to DAY) as d," |
| + " sum(\"unit_sales\") as s\n" |
| + "from \"foodmart\"\n" |
| + "group by \"brand_name\", floor(\"timestamp\" to DAY)\n" |
| + "order by \"brand_name\""; |
| final String subDruidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'brand_name'},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'floor_day','extractionFn':{'type':'timeFormat'"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$2, FLOOR" |
| + "($0, FLAG(DAY)), $89]], groups=[{0, 1}], aggs=[[SUM($2)]], sort0=[0], dir0=[ASC])"; |
| sql(sql) |
| .runs() |
| .returnsStartingWith("brand_name=ADJ; D=1997-01-11 00:00:00; S=2", |
| "brand_name=ADJ; D=1997-01-12 00:00:00; S=3", |
| "brand_name=ADJ; D=1997-01-17 00:00:00; S=3") |
| .explainContains(explain) |
| .queryContains(druidChecker(subDruidQuery)); |
| } |
| |
| /** Tests a query that contains no GROUP BY and is therefore executed as a |
| * Druid "select" query. */ |
| @Test public void testFilterSortDesc() { |
| final String sql = "select * from \"foodmart\"\n" |
| + "where \"product_id\" BETWEEN '1500' AND '1502'\n" |
| + "order by \"state_province\" desc, \"product_id\""; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart'," |
| + "'descending':false,'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'filter':{'type':'and','fields':[" |
| + "{'type':'bound','dimension':'product_id','lower':'1500','lowerStrict':false,'ordering':'lexicographic'}," |
| + "{'type':'bound','dimension':'product_id','upper':'1502','upperStrict':false,'ordering':'lexicographic'}]}," |
| + "'dimensions':['product_id','brand_name','product_name','SKU','SRP','gross_weight','net_weight'," |
| + "'recyclable_package','low_fat','units_per_case','cases_per_pallet','shelf_width','shelf_height'," |
| + "'shelf_depth','product_class_id','product_subcategory','product_category','product_department'," |
| + "'product_family','customer_id','account_num','lname','fname','mi','address1','address2','address3'," |
| + "'address4','city','state_province','postal_code','country','customer_region_id','phone1','phone2'," |
| + "'birthdate','marital_status','yearly_income','gender','total_children','num_children_at_home'," |
| + "'education','date_accnt_opened','member_card','occupation','houseowner','num_cars_owned'," |
| + "'fullname','promotion_id','promotion_district_id','promotion_name','media_type','cost','start_date'," |
| + "'end_date','store_id','store_type','region_id','store_name','store_number','store_street_address'," |
| + "'store_city','store_state','store_postal_code','store_country','store_manager','store_phone'," |
| + "'store_fax','first_opened_date','last_remodel_date','store_sqft','grocery_sqft','frozen_sqft'," |
| + "'meat_sqft','coffee_bar','video_store','salad_bar','prepared_food','florist','time_id','the_day'," |
| + "'the_month','the_year','day_of_month','week_of_year','month_of_year','quarter','fiscal_period']," |
| + "'metrics':['unit_sales','store_sales','store_cost'],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true},'context':{'druid.query.fetch':false}}"; |
| sql(sql) |
| .limit(4) |
| .returns( |
| new Function<ResultSet, Void>() { |
| public Void apply(ResultSet resultSet) { |
| try { |
| for (int i = 0; i < 4; i++) { |
| assertTrue(resultSet.next()); |
| assertThat(resultSet.getString("product_name"), |
| is("Fort West Dried Apricots")); |
| } |
| assertFalse(resultSet.next()); |
| return null; |
| } catch (SQLException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** As {@link #testFilterSortDesc()} but the bounds are numeric. */ |
| @Test public void testFilterSortDescNumeric() { |
| final String sql = "select * from \"foodmart\"\n" |
| + "where \"product_id\" BETWEEN 1500 AND 1502\n" |
| + "order by \"state_province\" desc, \"product_id\""; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart'," |
| + "'descending':false,'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'filter':{'type':'and','fields':[" |
| + "{'type':'bound','dimension':'product_id','lower':'1500','lowerStrict':false,'ordering':'numeric'}," |
| + "{'type':'bound','dimension':'product_id','upper':'1502','upperStrict':false,'ordering':'numeric'}]}," |
| + "'dimensions':['product_id','brand_name','product_name','SKU','SRP','gross_weight','net_weight'," |
| + "'recyclable_package','low_fat','units_per_case','cases_per_pallet','shelf_width','shelf_height'," |
| + "'shelf_depth','product_class_id','product_subcategory','product_category','product_department'," |
| + "'product_family','customer_id','account_num','lname','fname','mi','address1','address2','address3'," |
| + "'address4','city','state_province','postal_code','country','customer_region_id','phone1','phone2'," |
| + "'birthdate','marital_status','yearly_income','gender','total_children','num_children_at_home'," |
| + "'education','date_accnt_opened','member_card','occupation','houseowner','num_cars_owned'," |
| + "'fullname','promotion_id','promotion_district_id','promotion_name','media_type','cost','start_date'," |
| + "'end_date','store_id','store_type','region_id','store_name','store_number','store_street_address'," |
| + "'store_city','store_state','store_postal_code','store_country','store_manager','store_phone'," |
| + "'store_fax','first_opened_date','last_remodel_date','store_sqft','grocery_sqft','frozen_sqft'," |
| + "'meat_sqft','coffee_bar','video_store','salad_bar','prepared_food','florist','time_id','the_day'," |
| + "'the_month','the_year','day_of_month','week_of_year','month_of_year','quarter','fiscal_period']," |
| + "'metrics':['unit_sales','store_sales','store_cost'],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true},'context':{'druid.query.fetch':false}}"; |
| sql(sql) |
| .limit(4) |
| .returns( |
| new Function<ResultSet, Void>() { |
| public Void apply(ResultSet resultSet) { |
| try { |
| for (int i = 0; i < 4; i++) { |
| assertTrue(resultSet.next()); |
| assertThat(resultSet.getString("product_name"), |
| is("Fort West Dried Apricots")); |
| } |
| assertFalse(resultSet.next()); |
| return null; |
| } catch (SQLException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Tests a query whose filter removes all rows. */ |
| @Test public void testFilterOutEverything() { |
| final String sql = "select * from \"foodmart\"\n" |
| + "where \"product_id\" = -1"; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart'," |
| + "'descending':false,'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'filter':{'type':'selector','dimension':'product_id','value':'-1'}," |
| + "'dimensions':['product_id','brand_name','product_name','SKU','SRP'," |
| + "'gross_weight','net_weight','recyclable_package','low_fat','units_per_case'," |
| + "'cases_per_pallet','shelf_width','shelf_height','shelf_depth'," |
| + "'product_class_id','product_subcategory','product_category'," |
| + "'product_department','product_family','customer_id','account_num'," |
| + "'lname','fname','mi','address1','address2','address3','address4'," |
| + "'city','state_province','postal_code','country','customer_region_id'," |
| + "'phone1','phone2','birthdate','marital_status','yearly_income','gender'," |
| + "'total_children','num_children_at_home','education','date_accnt_opened'," |
| + "'member_card','occupation','houseowner','num_cars_owned','fullname'," |
| + "'promotion_id','promotion_district_id','promotion_name','media_type','cost'," |
| + "'start_date','end_date','store_id','store_type','region_id','store_name'," |
| + "'store_number','store_street_address','store_city','store_state'," |
| + "'store_postal_code','store_country','store_manager','store_phone'," |
| + "'store_fax','first_opened_date','last_remodel_date','store_sqft','grocery_sqft'," |
| + "'frozen_sqft','meat_sqft','coffee_bar','video_store','salad_bar','prepared_food'," |
| + "'florist','time_id','the_day','the_month','the_year','day_of_month'," |
| + "'week_of_year','month_of_year','quarter','fiscal_period']," |
| + "'metrics':['unit_sales','store_sales','store_cost'],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true},'context':{'druid.query.fetch':false}}"; |
| sql(sql) |
| .limit(4) |
| .returnsUnordered() |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** As {@link #testFilterSortDescNumeric()} but with a filter that cannot |
| * be pushed down to Druid. */ |
| @Test public void testNonPushableFilterSortDesc() { |
| final String sql = "select * from \"foodmart\"\n" |
| + "where cast(\"product_id\" as integer) - 1500 BETWEEN 0 AND 2\n" |
| + "order by \"state_province\" desc, \"product_id\""; |
| final String druidQuery = "{'queryType':'select','dataSource':'foodmart'," |
| + "'descending':false,'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'dimensions':['product_id','brand_name','product_name','SKU','SRP','gross_weight'," |
| + "'net_weight','recyclable_package','low_fat','units_per_case','cases_per_pallet'," |
| + "'shelf_width','shelf_height','shelf_depth','product_class_id','product_subcategory'," |
| + "'product_category','product_department','product_family','customer_id','account_num'," |
| + "'lname','fname','mi','address1','address2','address3','address4','city','state_province'," |
| + "'postal_code','country','customer_region_id','phone1','phone2','birthdate','marital_status'," |
| + "'yearly_income','gender','total_children','num_children_at_home','education'," |
| + "'date_accnt_opened','member_card','occupation','houseowner','num_cars_owned','fullname'," |
| + "'promotion_id','promotion_district_id','promotion_name','media_type','cost','start_date'," |
| + "'end_date','store_id','store_type','region_id','store_name','store_number','store_street_address'," |
| + "'store_city','store_state','store_postal_code','store_country','store_manager','store_phone'," |
| + "'store_fax','first_opened_date','last_remodel_date','store_sqft','grocery_sqft','frozen_sqft'," |
| + "'meat_sqft','coffee_bar','video_store','salad_bar','prepared_food','florist','time_id','the_day'," |
| + "'the_month','the_year','day_of_month','week_of_year','month_of_year','quarter','fiscal_period']," |
| + "'metrics':['unit_sales','store_sales','store_cost'],'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true},'context':{'druid.query.fetch':false}}"; |
| sql(sql) |
| .limit(4) |
| .returns( |
| new Function<ResultSet, Void>() { |
| public Void apply(ResultSet resultSet) { |
| try { |
| for (int i = 0; i < 4; i++) { |
| assertTrue(resultSet.next()); |
| assertThat(resultSet.getString("product_name"), |
| is("Fort West Dried Apricots")); |
| } |
| assertFalse(resultSet.next()); |
| return null; |
| } catch (SQLException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testUnionPlan() { |
| final String sql = "select distinct \"gender\" from \"foodmart\"\n" |
| + "union all\n" |
| + "select distinct \"marital_status\" from \"foodmart\""; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " BindableUnion(all=[true])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{39}], aggs=[[]])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{37}], aggs=[[]])"; |
| sql(sql) |
| .explainContains(explain) |
| .returnsUnordered( |
| "gender=F", |
| "gender=M", |
| "gender=M", |
| "gender=S"); |
| } |
| |
| @Test public void testFilterUnionPlan() { |
| final String sql = "select * from (\n" |
| + " select distinct \"gender\" from \"foodmart\"\n" |
| + " union all\n" |
| + " select distinct \"marital_status\" from \"foodmart\")\n" |
| + "where \"gender\" = 'M'"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " BindableFilter(condition=[=($0, 'M')])\n" |
| + " BindableUnion(all=[true])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{39}], aggs=[[]])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{37}], aggs=[[]])"; |
| sql(sql) |
| .explainContains(explain) |
| .returnsUnordered("gender=M", |
| "gender=M"); |
| } |
| |
| @Test public void testCountGroupByEmpty() { |
| final String druidQuery = "{'queryType':'timeseries','dataSource':'foodmart'," |
| + "'descending':false,'granularity':'all'," |
| + "'aggregations':[{'type':'count','name':'EXPR$0'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'context':{'skipEmptyBuckets':true}}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[]], groups=[{}], aggs=[[COUNT()]])"; |
| final String sql = "select count(*) from \"foodmart\""; |
| sql(sql) |
| .returns("EXPR$0=86829\n") |
| .queryContains(druidChecker(druidQuery)) |
| .explainContains(explain); |
| } |
| |
| @Test public void testGroupByOneColumnNotProjected() { |
| final String sql = "select count(*) as c from \"foodmart\"\n" |
| + "group by \"state_province\" order by 1"; |
| sql(sql) |
| .returnsOrdered("C=21610", |
| "C=24441", |
| "C=40778"); |
| } |
| |
| /** Unlike {@link #testGroupByTimeAndOneColumnNotProjected()}, we cannot use |
| * "topN" because we have a global limit, and that requires |
| * {@code granularity: all}. */ |
| @Test public void testGroupByTimeAndOneColumnNotProjectedWithLimit() { |
| final String sql = "select count(*) as \"c\", floor(\"timestamp\" to MONTH) as \"month\"\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH), \"state_province\"\n" |
| + "order by \"c\" desc limit 3"; |
| sql(sql) |
| .returnsOrdered("c=4070; month=1997-12-01 00:00:00", |
| "c=4033; month=1997-11-01 00:00:00", |
| "c=3511; month=1997-07-01 00:00:00") |
| .queryContains(druidChecker("'queryType':'groupBy'")); |
| } |
| |
| @Test public void testGroupByTimeAndOneMetricNotProjected() { |
| final String sql = |
| "select count(*) as \"c\", floor(\"timestamp\" to MONTH) as \"month\", floor" |
| + "(\"store_sales\") as sales\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH), \"state_province\", floor" |
| + "(\"store_sales\")\n" |
| + "order by \"c\" desc limit 3"; |
| sql(sql).returnsOrdered("c=494; month=1997-11-01 00:00:00; SALES=5.0", |
| "c=475; month=1997-12-01 00:00:00; SALES=5.0", |
| "c=468; month=1997-03-01 00:00:00; SALES=5.0" |
| ).queryContains(druidChecker("'queryType':'select'")); |
| } |
| |
| @Test public void testGroupByTimeAndOneColumnNotProjected() { |
| final String sql = "select count(*) as \"c\",\n" |
| + " floor(\"timestamp\" to MONTH) as \"month\"\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH), \"state_province\"\n" |
| + "having count(*) > 3500"; |
| sql(sql) |
| .returnsUnordered("c=3511; month=1997-07-01 00:00:00", |
| "c=4033; month=1997-11-01 00:00:00", |
| "c=4070; month=1997-12-01 00:00:00") |
| .queryContains(druidChecker("'queryType':'groupBy'")); |
| } |
| |
| @Test public void testOrderByOneColumnNotProjected() { |
| // Result including state: CA=24441, OR=21610, WA=40778 |
| final String sql = "select count(*) as c from \"foodmart\"\n" |
| + "group by \"state_province\" order by \"state_province\""; |
| sql(sql) |
| .returnsOrdered("C=24441", |
| "C=21610", |
| "C=40778"); |
| } |
| |
| @Test public void testGroupByOneColumn() { |
| final String sql = "select \"state_province\", count(*) as c\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\"\n" |
| + "order by \"state_province\""; |
| String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{30}], " |
| + "aggs=[[COUNT()]], sort0=[0], dir0=[ASC])"; |
| sql(sql) |
| .limit(2) |
| .returnsOrdered("state_province=CA; C=24441", |
| "state_province=OR; C=21610") |
| .explainContains(explain); |
| } |
| |
| @Test public void testGroupByOneColumnReversed() { |
| final String sql = "select count(*) as c, \"state_province\"\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\"\n" |
| + "order by \"state_province\""; |
| sql(sql) |
| .limit(2) |
| .returnsOrdered("C=24441; state_province=CA", |
| "C=21610; state_province=OR"); |
| } |
| |
| @Test public void testGroupByAvgSumCount() { |
| final String sql = "select \"state_province\",\n" |
| + " avg(\"unit_sales\") as a,\n" |
| + " sum(\"unit_sales\") as s,\n" |
| + " count(\"store_sqft\") as c,\n" |
| + " count(*) as c0\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\"\n" |
| + "order by 1"; |
| sql(sql) |
| .limit(2) |
| .returnsUnordered("state_province=CA; A=3; S=74748; C=16347; C0=24441", |
| "state_province=OR; A=3; S=67659; C=21610; C0=21610") |
| .queryContains(druidChecker("'queryType':'select'")); |
| } |
| |
| @Test public void testGroupByMonthGranularity() { |
| final String sql = "select sum(\"unit_sales\") as s,\n" |
| + " count(\"store_sqft\") as c\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH)"; |
| String druidQuery = "{'queryType':'select','dataSource':'foodmart'"; |
| sql(sql) |
| .limit(3) |
| .returnsUnordered("S=21081; C=5793", "S=23763; C=6762", "S=25270; C=7026") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1577">[CALCITE-1577] |
| * Druid adapter: Incorrect result - limit on timestamp disappears</a>. */ |
| @Test public void testGroupByMonthGranularitySort() { |
| final String sql = "select floor(\"timestamp\" to MONTH) as m,\n" |
| + " sum(\"unit_sales\") as s,\n" |
| + " count(\"store_sqft\") as c\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH)\n" |
| + "order by floor(\"timestamp\" to MONTH) ASC"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " BindableSort(sort0=[$0], dir0=[ASC])\n" |
| + " BindableAggregate(group=[{0}], S=[SUM($1)], C=[COUNT($2)])\n" |
| + " BindableProject(M=[FLOOR($0, FLAG(MONTH))], unit_sales=[$2], store_sqft=[$1])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$0, $71, $89]])"; |
| sql(sql) |
| .returnsOrdered("M=1997-01-01 00:00:00; S=21628; C=5957", |
| "M=1997-02-01 00:00:00; S=20957; C=5842", |
| "M=1997-03-01 00:00:00; S=23706; C=6528", |
| "M=1997-04-01 00:00:00; S=20179; C=5523", |
| "M=1997-05-01 00:00:00; S=21081; C=5793", |
| "M=1997-06-01 00:00:00; S=21350; C=5863", |
| "M=1997-07-01 00:00:00; S=23763; C=6762", |
| "M=1997-08-01 00:00:00; S=21697; C=5915", |
| "M=1997-09-01 00:00:00; S=20388; C=5591", |
| "M=1997-10-01 00:00:00; S=19958; C=5606", |
| "M=1997-11-01 00:00:00; S=25270; C=7026", |
| "M=1997-12-01 00:00:00; S=26796; C=7338") |
| .explainContains(explain); |
| } |
| |
| @Test public void testGroupByMonthGranularitySortLimit() { |
| final String sql = "select floor(\"timestamp\" to MONTH) as m,\n" |
| + " sum(\"unit_sales\") as s,\n" |
| + " count(\"store_sqft\") as c\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH)\n" |
| + "order by floor(\"timestamp\" to MONTH) limit 3"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " BindableSort(sort0=[$0], dir0=[ASC], fetch=[3])\n" |
| + " BindableAggregate(group=[{0}], S=[SUM($1)], C=[COUNT($2)])\n" |
| + " BindableProject(M=[FLOOR($0, FLAG(MONTH))], unit_sales=[$2], store_sqft=[$1])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$0, $71, $89]])"; |
| sql(sql) |
| .returnsOrdered("M=1997-01-01 00:00:00; S=21628; C=5957", |
| "M=1997-02-01 00:00:00; S=20957; C=5842", |
| "M=1997-03-01 00:00:00; S=23706; C=6528") |
| .explainContains(explain); |
| } |
| |
| @Test public void testGroupByDayGranularity() { |
| final String sql = "select sum(\"unit_sales\") as s,\n" |
| + " count(\"store_sqft\") as c\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to DAY)"; |
| String druidQuery = "{'queryType':'select','dataSource':'foodmart'"; |
| sql(sql) |
| .limit(3) |
| .returnsUnordered("S=1244; C=391", "S=550; C=112", "S=580; C=171") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testGroupByMonthGranularityFiltered() { |
| final String sql = "select sum(\"unit_sales\") as s,\n" |
| + " count(\"store_sqft\") as c\n" |
| + "from \"foodmart\"\n" |
| + "where \"timestamp\" >= '1996-01-01 00:00:00' and " |
| + " \"timestamp\" < '1998-01-01 00:00:00'\n" |
| + "group by floor(\"timestamp\" to MONTH)"; |
| String druidQuery = "{'queryType':'select','dataSource':'foodmart'"; |
| sql(sql) |
| .limit(3) |
| .returnsUnordered("S=21081; C=5793", "S=23763; C=6762", "S=25270; C=7026") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testTopNMonthGranularity() { |
| final String sql = "select sum(\"unit_sales\") as s,\n" |
| + "max(\"unit_sales\") as m,\n" |
| + "\"state_province\" as p\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\", floor(\"timestamp\" to MONTH)\n" |
| + "order by s desc limit 3"; |
| // Cannot use a Druid "topN" query, granularity != "all"; |
| // have to use "groupBy" query followed by external Sort and fetch. |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " BindableProject(S=[$2], M=[$3], P=[$0])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$30, FLOOR" |
| + "($0, FLAG(MONTH)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]], sort0=[2], " |
| + "dir0=[DESC], fetch=[3])"; |
| final String druidQueryPart1 = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'state_province'},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'floor_month','extractionFn':{'type':'timeFormat','format'"; |
| final String druidQueryPart2 = "'limitSpec':{'type':'default','limit':3," |
| + "'columns':[{'dimension':'S','direction':'descending'," |
| + "'dimensionOrder':'numeric'}]},'aggregations':[{'type':'longSum'," |
| + "'name':'S','fieldName':'unit_sales'},{'type':'longMax','name':'M'," |
| + "'fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sql) |
| .returnsUnordered("S=12399; M=6; P=WA", |
| "S=12297; M=7; P=WA", |
| "S=10640; M=6; P=WA") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQueryPart1, druidQueryPart2)); |
| } |
| |
| @Test public void testTopNDayGranularityFiltered() { |
| final String sql = "select sum(\"unit_sales\") as s,\n" |
| + "max(\"unit_sales\") as m,\n" |
| + "\"state_province\" as p\n" |
| + "from \"foodmart\"\n" |
| + "where \"timestamp\" >= '1997-01-01 00:00:00' and " |
| + " \"timestamp\" < '1997-09-01 00:00:00'\n" |
| + "group by \"state_province\", floor(\"timestamp\" to DAY)\n" |
| + "order by s desc limit 6"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " BindableProject(S=[$2], M=[$3], P=[$0])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1997-01-01T00:00:00.000/1997-09-01T00:00:00.000]], projects=[[$30, FLOOR" |
| + "($0, FLAG(DAY)), $89]], groups=[{0, 1}], aggs=[[SUM($2), MAX($2)]], sort0=[2], " |
| + "dir0=[DESC], fetch=[6])"; |
| final String druidQueryType = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions'"; |
| final String limitSpec = "'limitSpec':{'type':'default','limit':6," |
| + "'columns':[{'dimension':'S','direction':'descending','dimensionOrder':'numeric'}]}"; |
| sql(sql) |
| .returnsOrdered("S=2527; M=5; P=OR", |
| "S=2525; M=6; P=OR", |
| "S=2238; M=6; P=OR", |
| "S=1715; M=5; P=OR", |
| "S=1691; M=5; P=OR", |
| "S=1629; M=5; P=WA") |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQueryType, limitSpec)); |
| } |
| |
| @Test public void testGroupByHaving() { |
| // Note: We don't push down HAVING yet |
| final String sql = "select \"state_province\" as s, count(*) as c\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\" having count(*) > 23000 order by 1"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " BindableSort(sort0=[$0], dir0=[ASC])\n" |
| + " BindableFilter(condition=[>($1, 23000)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{30}], aggs=[[COUNT()]])"; |
| sql(sql) |
| .returnsOrdered("S=CA; C=24441", |
| "S=WA; C=40778") |
| .explainContains(explain); |
| } |
| |
| @Test public void testGroupComposite() { |
| // Note: We don't push down SORT-LIMIT yet |
| final String sql = "select count(*) as c, \"state_province\", \"city\"\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\", \"city\"\n" |
| + "order by c desc limit 2"; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " BindableProject(C=[$2], state_province=[$1], city=[$0])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{29, 30}], aggs=[[COUNT()]], sort0=[2], dir0=[DESC], fetch=[2])"; |
| sql(sql) |
| .returnsOrdered("C=7394; state_province=WA; city=Spokane", |
| "C=3958; state_province=WA; city=Olympia") |
| .explainContains(explain); |
| } |
| |
| /** Tests that distinct-count is pushed down to Druid and evaluated using |
| * "cardinality". The result is approximate, but gives the correct result in |
| * this example when rounded down using FLOOR. */ |
| @Test public void testDistinctCount() { |
| final String sql = "select \"state_province\",\n" |
| + " floor(count(distinct \"city\")) as cdc\n" |
| + "from \"foodmart\"\n" |
| + "group by \"state_province\"\n" |
| + "order by 2 desc limit 2"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " BindableSort(sort0=[$1], dir0=[DESC], fetch=[2])\n" |
| + " BindableProject(state_province=[$0], CDC=[FLOOR($1)])\n" |
| + " BindableAggregate(group=[{1}], agg#0=[COUNT($0)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], groups=[{29, 30}], " |
| + "aggs=[[]])"; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default','dimension':'city'}," |
| + "{'type':'default','dimension':'state_province'}]," |
| + "'limitSpec':{'type':'default'},'aggregations':[{'type':'longSum'," |
| + "'name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sql) |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)) |
| .returnsUnordered("state_province=CA; CDC=45", |
| "state_province=WA; CDC=22"); |
| } |
| |
| /** Tests that projections of columns are pushed into the DruidQuery, and |
| * projections of expressions that Druid cannot handle (in this case, a |
| * literal 0) stay up. */ |
| @Test public void testProject() { |
| final String sql = "select \"product_name\", 0 as zero\n" |
| + "from \"foodmart\"\n" |
| + "order by \"product_name\""; |
| final String explain = "PLAN=" |
| + "EnumerableInterpreter\n" |
| + " BindableProject(product_name=[$0], ZERO=[0])\n" |
| + " BindableSort(sort0=[$0], dir0=[ASC])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$3]])"; |
| sql(sql) |
| .limit(2) |
| .explainContains(explain) |
| .returnsUnordered("product_name=ADJ Rosy Sunglasses; ZERO=0", |
| "product_name=ADJ Rosy Sunglasses; ZERO=0"); |
| } |
| |
| @Test public void testFilterDistinct() { |
| final String sql = "select distinct \"state_province\", \"city\",\n" |
| + " \"product_name\"\n" |
| + "from \"foodmart\"\n" |
| + "where \"product_name\" = 'High Top Dried Mushrooms'\n" |
| + "and \"quarter\" in ('Q2', 'Q3')\n" |
| + "and \"state_province\" = 'WA'"; |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart','granularity':'all'," |
| + "'dimensions':[{'type':'default','dimension':'state_province'}," |
| + "{'type':'default','dimension':'city'}," |
| + "{'type':'default','dimension':'product_name'}],'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'and','fields':[{'type':'selector','dimension':'product_name'," |
| + "'value':'High Top Dried Mushrooms'},{'type':'or','fields':[{'type':'selector'," |
| + "'dimension':'quarter','value':'Q2'},{'type':'selector','dimension':'quarter'," |
| + "'value':'Q3'}]},{'type':'selector','dimension':'state_province','value':'WA'}]}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]]," |
| + " filter=[AND(=($3, 'High Top Dried Mushrooms')," |
| + " OR(=($87, 'Q2')," |
| + " =($87, 'Q3'))," |
| + " =($30, 'WA'))]," |
| + " projects=[[$30, $29, $3]], groups=[{0, 1, 2}], aggs=[[]])\n"; |
| sql(sql) |
| .queryContains(druidChecker(druidQuery)) |
| .explainContains(explain) |
| .returnsUnordered( |
| "state_province=WA; city=Bremerton; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Everett; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Kirkland; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Lynnwood; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Olympia; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Port Orchard; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Puyallup; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Spokane; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Tacoma; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Yakima; product_name=High Top Dried Mushrooms"); |
| } |
| |
| @Test public void testFilter() { |
| final String sql = "select \"state_province\", \"city\",\n" |
| + " \"product_name\"\n" |
| + "from \"foodmart\"\n" |
| + "where \"product_name\" = 'High Top Dried Mushrooms'\n" |
| + "and \"quarter\" in ('Q2', 'Q3')\n" |
| + "and \"state_province\" = 'WA'"; |
| final String druidQuery = "{'queryType':'select'," |
| + "'dataSource':'foodmart'," |
| + "'descending':false," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'filter':{'type':'and','fields':[" |
| + "{'type':'selector','dimension':'product_name','value':'High Top Dried Mushrooms'}," |
| + "{'type':'or','fields':[" |
| + "{'type':'selector','dimension':'quarter','value':'Q2'}," |
| + "{'type':'selector','dimension':'quarter','value':'Q3'}]}," |
| + "{'type':'selector','dimension':'state_province','value':'WA'}]}," |
| + "'dimensions':['state_province','city','product_name']," |
| + "'metrics':[]," |
| + "'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true},'context':{'druid.query.fetch':false}}"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "filter=[AND(=($3, 'High Top Dried Mushrooms'), " |
| + "OR(=($87, 'Q2'), =($87, 'Q3')), =($30, 'WA'))], " |
| + "projects=[[$30, $29, $3]])\n"; |
| sql(sql) |
| .queryContains(druidChecker(druidQuery)) |
| .explainContains(explain) |
| .returnsUnordered( |
| "state_province=WA; city=Bremerton; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Everett; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Kirkland; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Lynnwood; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Olympia; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Port Orchard; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Puyallup; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Puyallup; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Spokane; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Spokane; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Spokane; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Tacoma; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Yakima; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Yakima; product_name=High Top Dried Mushrooms", |
| "state_province=WA; city=Yakima; product_name=High Top Dried Mushrooms"); |
| } |
| |
| /** Tests that conditions applied to time units extracted via the EXTRACT |
| * function become ranges on the timestamp column |
| * |
| * <p>Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1334">[CALCITE-1334] |
| * Convert predicates on EXTRACT function calls into date ranges</a>. */ |
| @Test public void testFilterTimestamp() { |
| String sql = "select count(*) as c\n" |
| + "from \"foodmart\"\n" |
| + "where extract(year from \"timestamp\") = 1997\n" |
| + "and extract(month from \"timestamp\") in (4, 6)\n"; |
| final String explain = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[AND(=" |
| + "(EXTRACT_DATE(FLAG(YEAR), /INT(Reinterpret($0), 86400000)), 1997), OR(=(EXTRACT_DATE" |
| + "(FLAG(MONTH), /INT(Reinterpret($0), 86400000)), 4), =(EXTRACT_DATE(FLAG(MONTH), /INT" |
| + "(Reinterpret($0), 86400000)), 6)))], groups=[{}], aggs=[[COUNT()]])"; |
| sql(sql) |
| .explainContains(explain) |
| .returnsUnordered("C=13500"); |
| } |
| |
| @Test public void testFilterSwapped() { |
| String sql = "select \"state_province\"\n" |
| + "from \"foodmart\"\n" |
| + "where 'High Top Dried Mushrooms' = \"product_name\""; |
| final String explain = "EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[=('High Top Dried Mushrooms', $3)], projects=[[$30]])"; |
| final String druidQuery = "'filter':{'type':'selector','dimension':'product_name'," |
| + "'value':'High Top Dried Mushrooms'}"; |
| sql(sql) |
| .explainContains(explain) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Tests a query that exposed several bugs in the interpreter. */ |
| @Test public void testWhereGroupBy() { |
| String sql = "select \"wikiticker\".\"countryName\" as \"c0\",\n" |
| + " sum(\"wikiticker\".\"count\") as \"m1\",\n" |
| + " sum(\"wikiticker\".\"deleted\") as \"m2\",\n" |
| + " sum(\"wikiticker\".\"delta\") as \"m3\"\n" |
| + "from \"wiki\" as \"wikiticker\"\n" |
| + "where (\"wikiticker\".\"countryName\" in ('Colombia', 'France',\n" |
| + " 'Germany', 'India', 'Italy', 'Russia', 'United Kingdom',\n" |
| + " 'United States') or \"wikiticker\".\"countryName\" is null)\n" |
| + "group by \"wikiticker\".\"countryName\""; |
| sql(sql, WIKI) |
| .returnsCount(9); |
| } |
| |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1656">[CALCITE-1656] |
| * Improve cost function in DruidQuery to encourage early column |
| * pruning</a>. */ |
| @Test public void testFieldBasedCostColumnPruning() { |
| // A query where filter cannot be pushed to Druid but |
| // the project can still be pushed in order to prune extra columns. |
| String sql = "select \"countryName\", floor(\"time\" to DAY),\n" |
| + " cast(count(*) as integer) as c\n" |
| + "from \"wiki\"\n" |
| + "where floor(\"time\" to DAY) >= '1997-01-01 00:00:00'\n" |
| + "and floor(\"time\" to DAY) < '1997-09-01 00:00:00'\n" |
| + "group by \"countryName\", floor(\"time\" TO DAY)\n" |
| + "order by c limit 5"; |
| String plan = "BindableProject(countryName=[$0], EXPR$1=[$1], C=[CAST($2):INTEGER NOT NULL])\n" |
| + " BindableSort(sort0=[$2], dir0=[ASC], fetch=[5])\n" |
| + " BindableAggregate(group=[{0, 1}], agg#0=[COUNT()])\n" |
| + " BindableProject(countryName=[$1], EXPR$1=[FLOOR($0, FLAG(DAY))])\n" |
| + " BindableFilter(condition=[AND(>=(FLOOR($0, FLAG(DAY)), 1997-01-01 00:00:00), <(FLOOR($0, FLAG(DAY)), 1997-09-01 00:00:00))])\n" |
| + " DruidQuery(table=[[wiki, wiki]], intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$0, $5]])"; |
| // NOTE: Druid query only has countryName as the dimension |
| // being queried after project is pushed to druid query. |
| String druidQuery = "{'queryType':'select'," |
| + "'dataSource':'wikiticker'," |
| + "'descending':false," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'dimensions':['countryName']," |
| + "'metrics':[]," |
| + "'granularity':'all'," |
| + "'pagingSpec':{'threshold':16384,'fromNext':true}," |
| + "'context':{'druid.query.fetch':false}}"; |
| sql(sql, WIKI).explainContains(plan); |
| sql(sql, WIKI).queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testGroupByMetricAndExtractTime() { |
| final String sql = "SELECT count(*), floor(\"timestamp\" to DAY), \"store_sales\" " |
| + "FROM \"foodmart\"\n" |
| + "GROUP BY \"store_sales\", floor(\"timestamp\" to DAY)\n ORDER BY \"store_sales\" DESC\n" |
| + "LIMIT 10\n"; |
| sql(sql).queryContains(druidChecker("{\"queryType\":\"select\"")); |
| } |
| |
| @Test public void testFilterOnDouble() { |
| String sql = "select \"product_id\" from \"foodmart\"\n" |
| + "where cast(\"product_id\" as double) < 0.41024 and \"product_id\" < 12223"; |
| sql(sql).queryContains( |
| druidChecker("'type':'bound','dimension':'product_id','upper':'0.41024'", |
| "'upper':'12223'")); |
| } |
| |
| @Test public void testPushAggregateOnTime() { |
| String sql = "select \"product_id\", \"timestamp\" as \"time\" from \"foodmart\" " |
| + "where \"product_id\" = 1016 " |
| + "and \"timestamp\" < cast('1997-01-03' as timestamp) " |
| + "and \"timestamp\" > cast('1990-01-01' as timestamp) " |
| + "group by \"timestamp\", \"product_id\" "; |
| String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy-MM-dd"; |
| sql(sql) |
| .queryContains(druidChecker(druidQuery)) |
| .returnsUnordered("product_id=1016; time=1997-01-02 00:00:00"); |
| } |
| |
| @Test public void testPushAggregateOnTimeWithExtractYear() { |
| String sql = "select EXTRACT( year from \"timestamp\") as \"year\",\"product_id\" from " |
| + "\"foodmart\" where \"product_id\" = 1016 and " |
| + "\"timestamp\" < cast('1999-01-02' as timestamp) and \"timestamp\" > cast" |
| + "('1997-01-01' as timestamp)" + " group by " |
| + " EXTRACT( year from \"timestamp\"), \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", |
| "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_year'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy'," |
| + "'timeZone':'UTC','locale':'en-US'}}")) |
| .returnsUnordered("year=1997; product_id=1016"); |
| } |
| |
| @Test public void testPushAggregateOnTimeWithExtractMonth() { |
| String sql = "select EXTRACT( month from \"timestamp\") as \"month\",\"product_id\" from " |
| + "\"foodmart\" where \"product_id\" = 1016 and " |
| + "\"timestamp\" < cast('1997-06-02' as timestamp) and \"timestamp\" > cast" |
| + "('1997-01-01' as timestamp)" + " group by " |
| + " EXTRACT( month from \"timestamp\"), \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", |
| "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M'," |
| + "'timeZone':'UTC','locale':'en-US'}}")) |
| .returnsUnordered("month=1; product_id=1016", "month=2; product_id=1016", |
| "month=3; product_id=1016", "month=4; product_id=1016", "month=5; product_id=1016"); |
| } |
| |
| @Test public void testPushAggregateOnTimeWithExtractDay() { |
| String sql = "select EXTRACT( day from \"timestamp\") as \"day\"," |
| + "\"product_id\" from \"foodmart\"" |
| + " where \"product_id\" = 1016 and " |
| + "\"timestamp\" < cast('1997-01-20' as timestamp) and \"timestamp\" > cast" |
| + "('1997-01-01' as timestamp)" + " group by " |
| + " EXTRACT( day from \"timestamp\"), \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", |
| "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_day'," |
| + "'extractionFn':{'type':'timeFormat','format':'d'," |
| + "'timeZone':'UTC','locale':'en-US'}}")) |
| .returnsUnordered("day=2; product_id=1016", "day=10; product_id=1016", |
| "day=13; product_id=1016", "day=16; product_id=1016"); |
| } |
| |
| // Calcite rewrite the extract function in the query as: |
| // rel#85:BindableProject.BINDABLE.[](input=rel#69:Subset#1.BINDABLE.[], |
| // hourOfDay=/INT(MOD(Reinterpret($0), 86400000), 3600000),product_id=$1). |
| // Currently 'EXTRACT( hour from \"timestamp\")' is not pushed to Druid. |
| @Ignore @Test public void testPushAggregateOnTimeWithExtractHourOfDay() { |
| String sql = |
| "select EXTRACT( hour from \"timestamp\") as \"hourOfDay\",\"product_id\" from " |
| + "\"foodmart\" where \"product_id\" = 1016 and " |
| + "\"timestamp\" < cast('1997-06-02' as timestamp) and \"timestamp\" > cast" |
| + "('1997-01-01' as timestamp)" + " group by " |
| + " EXTRACT( hour from \"timestamp\"), \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", |
| "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_0'," |
| + "'extractionFn':{'type':'timeFormat','format':'H'," |
| + "'timeZone':'UTC'}}")) |
| .returnsUnordered("month=01; product_id=1016", "month=02; product_id=1016", |
| "month=03; product_id=1016", "month=04; product_id=1016", "month=05; product_id=1016"); |
| } |
| |
| @Test public void testPushAggregateOnTimeWithExtractYearMonthDay() { |
| String sql = "select EXTRACT( day from \"timestamp\") as \"day\", EXTRACT( month from " |
| + "\"timestamp\") as \"month\", EXTRACT( year from \"timestamp\") as \"year\",\"" |
| + "product_id\" from \"foodmart\" where \"product_id\" = 1016 and " |
| + "\"timestamp\" < cast('1997-01-20' as timestamp) and \"timestamp\" > cast" |
| + "('1997-01-01' as timestamp)" |
| + " group by " |
| + " EXTRACT( day from \"timestamp\"), EXTRACT( month from \"timestamp\")," |
| + " EXTRACT( year from \"timestamp\"), \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", |
| "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_day'," |
| + "'extractionFn':{'type':'timeFormat','format':'d'," |
| + "'timeZone':'UTC','locale':'en-US'}}", "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M'," |
| + "'timeZone':'UTC','locale':'en-US'}}", "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_year'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy'," |
| + "'timeZone':'UTC','locale':'en-US'}}")) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1997-01-01T00:00:00.001/1997-01-20T00:00:00.000]], filter=[=($1, 1016)" |
| + "], projects=[[EXTRACT_DATE(FLAG(DAY), /INT(Reinterpret($0), 86400000)), " |
| + "EXTRACT_DATE(FLAG(MONTH), /INT(Reinterpret($0), 86400000)), EXTRACT_DATE(FLAG" |
| + "(YEAR), /INT(Reinterpret($0), 86400000)), $1]], groups=[{0, 1, 2, 3}], aggs=[[]])\n") |
| .returnsUnordered("day=2; month=1; year=1997; product_id=1016", |
| "day=10; month=1; year=1997; product_id=1016", |
| "day=13; month=1; year=1997; product_id=1016", |
| "day=16; month=1; year=1997; product_id=1016"); |
| } |
| |
| @Test public void testPushAggregateOnTimeWithExtractYearMonthDayWithOutRenaming() { |
| String sql = "select EXTRACT( day from \"timestamp\"), EXTRACT( month from " |
| + "\"timestamp\"), EXTRACT( year from \"timestamp\"),\"" |
| + "product_id\" from \"foodmart\" where \"product_id\" = 1016 and " |
| + "\"timestamp\" < cast('1997-01-20' as timestamp) and \"timestamp\" > cast" |
| + "('1997-01-01' as timestamp)" |
| + " group by " |
| + " EXTRACT( day from \"timestamp\"), EXTRACT( month from \"timestamp\")," |
| + " EXTRACT( year from \"timestamp\"), \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_day'," |
| + "'extractionFn':{'type':'timeFormat','format':'d'," |
| + "'timeZone':'UTC','locale':'en-US'}}", "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M'," |
| + "'timeZone':'UTC','locale':'en-US'}}", "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_year'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy'," |
| + "'timeZone':'UTC','locale':'en-US'}}")) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1997-01-01T00:00:00.001/1997-01-20T00:00:00.000]], filter=[=($1, 1016)" |
| + "], projects=[[EXTRACT_DATE(FLAG(DAY), /INT(Reinterpret($0), 86400000)), " |
| + "EXTRACT_DATE(FLAG(MONTH), /INT(Reinterpret($0), 86400000)), EXTRACT_DATE(FLAG" |
| + "(YEAR), /INT(Reinterpret($0), 86400000)), $1]], groups=[{0, 1, 2, 3}], aggs=[[]])\n") |
| .returnsUnordered("EXPR$0=2; EXPR$1=1; EXPR$2=1997; product_id=1016", |
| "EXPR$0=10; EXPR$1=1; EXPR$2=1997; product_id=1016", |
| "EXPR$0=13; EXPR$1=1; EXPR$2=1997; product_id=1016", |
| "EXPR$0=16; EXPR$1=1; EXPR$2=1997; product_id=1016"); |
| } |
| |
| @Test public void testPushAggregateOnTimeWithExtractWithOutRenaming() { |
| String sql = "select EXTRACT( day from \"timestamp\"), " |
| + "\"product_id\" as \"dayOfMonth\" from \"foodmart\" " |
| + "where \"product_id\" = 1016 and \"timestamp\" < cast('1997-01-20' as timestamp) " |
| + "and \"timestamp\" > cast('1997-01-01' as timestamp)" |
| + " group by " |
| + " EXTRACT( day from \"timestamp\"), EXTRACT( day from \"timestamp\")," |
| + " \"product_id\" "; |
| sql(sql) |
| .queryContains( |
| druidChecker( |
| ",'granularity':'all'", "{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_day'," |
| + "'extractionFn':{'type':'timeFormat','format':'d'," |
| + "'timeZone':'UTC','locale':'en-US'}}")) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1997-01-01T00:00:00.001/1997-01-20T00:00:00.000]], filter=[=($1, 1016)], " |
| + "projects=[[EXTRACT_DATE(FLAG(DAY), /INT(Reinterpret($0), 86400000)), $1]], " |
| + "groups=[{0, 1}], aggs=[[]])\n") |
| .returnsUnordered("EXPR$0=2; dayOfMonth=1016", "EXPR$0=10; dayOfMonth=1016", |
| "EXPR$0=13; dayOfMonth=1016", "EXPR$0=16; dayOfMonth=1016"); |
| } |
| |
| @Test public void testPushComplexFilter() { |
| String sql = "select sum(\"store_sales\") from \"foodmart\" " |
| + "where EXTRACT( year from \"timestamp\") = 1997 and " |
| + "\"cases_per_pallet\" >= 8 and \"cases_per_pallet\" <= 10 and " |
| + "\"units_per_case\" < 15 "; |
| String druidQuery = "{'queryType':'timeseries','dataSource':'foodmart'," |
| + "'descending':false,'granularity':'all','filter':{'type':'and'," |
| + "'fields':[{'type':'bound','dimension':'cases_per_pallet','lower':'8'," |
| + "'lowerStrict':false,'ordering':'numeric'},{'type':'bound'," |
| + "'dimension':'cases_per_pallet','upper':'10','upperStrict':false," |
| + "'ordering':'numeric'},{'type':'bound','dimension':'units_per_case'," |
| + "'upper':'15','upperStrict':true,'ordering':'numeric'}," |
| + "{'type':'selector','dimension':'__time','value':'1997'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy','timeZone':'UTC'," |
| + "'locale':'en-US'}}]},'aggregations':[{'type':'doubleSum'," |
| + "'name':'EXPR$0','fieldName':'store_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']," |
| + "'context':{'skipEmptyBuckets':true}}"; |
| sql(sql) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[AND(>=(CAST" |
| + "($11):BIGINT, 8), <=(CAST($11):BIGINT, 10), <(CAST($10):BIGINT, 15), =(EXTRACT_DATE" |
| + "(FLAG(YEAR), /INT(Reinterpret($0), 86400000)), 1997))], groups=[{}], " |
| + "aggs=[[SUM($90)]])") |
| .queryContains(druidChecker(druidQuery)) |
| .returnsUnordered("EXPR$0=75364.09998679161"); |
| } |
| |
| @Test public void testPushOfFilterExtractionOnDayAndMonth() { |
| String sql = "SELECT \"product_id\" , EXTRACT(day from \"timestamp\"), EXTRACT(month from " |
| + "\"timestamp\") from \"foodmart\" WHERE EXTRACT(day from \"timestamp\") >= 30 AND " |
| + "EXTRACT(month from \"timestamp\") = 11 " |
| + "AND \"product_id\" >= 1549 group by \"product_id\", EXTRACT(day from " |
| + "\"timestamp\"), EXTRACT(month from \"timestamp\")"; |
| sql(sql) |
| .queryContains( |
| druidChecker("{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'product_id'},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'extract_day','extractionFn':{'type':'timeFormat'," |
| + "'format':'d','timeZone':'UTC','locale':'en-US'}},{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M','timeZone':'UTC'," |
| + "'locale':'en-US'}}],'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'and','fields':[{'type':'bound'," |
| + "'dimension':'product_id','lower':'1549','lowerStrict':false," |
| + "'ordering':'numeric'},{'type':'bound','dimension':'__time'," |
| + "'lower':'30','lowerStrict':false,'ordering':'numeric'," |
| + "'extractionFn':{'type':'timeFormat','format':'d','timeZone':'UTC'," |
| + "'locale':'en-US'}},{'type':'selector','dimension':'__time'," |
| + "'value':'11','extractionFn':{'type':'timeFormat','format':'M'," |
| + "'timeZone':'UTC','locale':'en-US'}}]},'aggregations':[{'type':'longSum'," |
| + "'name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}")) |
| .returnsUnordered("product_id=1549; EXPR$1=30; EXPR$2=11", |
| "product_id=1553; EXPR$1=30; EXPR$2=11"); |
| } |
| |
| @Test public void testPushOfFilterExtractionOnDayAndMonthAndYear() { |
| String sql = "SELECT \"product_id\" , EXTRACT(day from \"timestamp\"), EXTRACT(month from " |
| + "\"timestamp\") , EXTRACT(year from \"timestamp\") from \"foodmart\" " |
| + "WHERE EXTRACT(day from \"timestamp\") >= 30 AND EXTRACT(month from \"timestamp\") = 11 " |
| + "AND \"product_id\" >= 1549 AND EXTRACT(year from \"timestamp\") = 1997" |
| + "group by \"product_id\", EXTRACT(day from \"timestamp\"), " |
| + "EXTRACT(month from \"timestamp\"), EXTRACT(year from \"timestamp\")"; |
| sql(sql) |
| .queryContains( |
| druidChecker("{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'product_id'},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'extract_day','extractionFn':{'type':'timeFormat'," |
| + "'format':'d','timeZone':'UTC','locale':'en-US'}},{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M','timeZone':'UTC'," |
| + "'locale':'en-US'}},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'extract_year','extractionFn':{'type':'timeFormat'," |
| + "'format':'yyyy','timeZone':'UTC','locale':'en-US'}}]," |
| + "'limitSpec':{'type':'default'},'filter':{'type':'and'," |
| + "'fields':[{'type':'bound','dimension':'product_id','lower':'1549'," |
| + "'lowerStrict':false,'ordering':'numeric'},{'type':'bound'," |
| + "'dimension':'__time','lower':'30','lowerStrict':false," |
| + "'ordering':'numeric','extractionFn':{'type':'timeFormat','format':'d'," |
| + "'timeZone':'UTC','locale':'en-US'}},{'type':'selector'," |
| + "'dimension':'__time','value':'11','extractionFn':{'type':'timeFormat'," |
| + "'format':'M','timeZone':'UTC','locale':'en-US'}},{'type':'selector'," |
| + "'dimension':'__time','value':'1997','extractionFn':{'type':'timeFormat'," |
| + "'format':'yyyy','timeZone':'UTC','locale':'en-US'}}]}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg'," |
| + "'fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}")) |
| .returnsUnordered("product_id=1549; EXPR$1=30; EXPR$2=11; EXPR$3=1997", |
| "product_id=1553; EXPR$1=30; EXPR$2=11; EXPR$3=1997"); |
| } |
| |
| @Test public void testFilterExtractionOnMonthWithBetween() { |
| String sqlQuery = "SELECT \"product_id\", EXTRACT(month from \"timestamp\") FROM \"foodmart\"" |
| + " WHERE EXTRACT(month from \"timestamp\") BETWEEN 10 AND 11 AND \"product_id\" >= 1558" |
| + " GROUP BY \"product_id\", EXTRACT(month from \"timestamp\")"; |
| String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'default'," |
| + "'dimension':'product_id'},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'extract_month','extractionFn':{'type':'timeFormat'," |
| + "'format':'M','timeZone':'UTC','locale':'en-US'}}]," |
| + "'limitSpec':{'type':'default'},'filter':{'type':'and'," |
| + "'fields':[{'type':'bound','dimension':'product_id','lower':'1558'," |
| + "'lowerStrict':false,'ordering':'numeric'},{'type':'bound'," |
| + "'dimension':'__time','lower':'10','lowerStrict':false," |
| + "'ordering':'numeric','extractionFn':{'type':'timeFormat','format':'M'," |
| + "'timeZone':'UTC','locale':'en-US'}},{'type':'bound'," |
| + "'dimension':'__time','upper':'11','upperStrict':false," |
| + "'ordering':'numeric','extractionFn':{'type':'timeFormat','format':'M'," |
| + "'timeZone':'UTC','locale':'en-US'}}]},'aggregations':[{'type':'longSum'," |
| + "'name':'dummy_agg','fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sqlQuery) |
| .returnsUnordered("product_id=1558; EXPR$1=10", "product_id=1558; EXPR$1=11", |
| "product_id=1559; EXPR$1=11") |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testFilterExtractionOnMonthWithIn() { |
| String sqlQuery = "SELECT \"product_id\", EXTRACT(month from \"timestamp\") FROM \"foodmart\"" |
| + " WHERE EXTRACT(month from \"timestamp\") IN (10, 11) AND \"product_id\" >= 1558" |
| + " GROUP BY \"product_id\", EXTRACT(month from \"timestamp\")"; |
| sql(sqlQuery) |
| .queryContains( |
| druidChecker("{'queryType':'groupBy'," |
| + "'dataSource':'foodmart','granularity':'all'," |
| + "'dimensions':[{'type':'default','dimension':'product_id'}," |
| + "{'type':'extraction','dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M','timeZone':'UTC'," |
| + "'locale':'en-US'}}],'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'and','fields':[{'type':'bound'," |
| + "'dimension':'product_id','lower':'1558','lowerStrict':false," |
| + "'ordering':'numeric'},{'type':'or','fields':[{'type':'selector'," |
| + "'dimension':'__time','value':'10','extractionFn':{'type':'timeFormat'," |
| + "'format':'M','timeZone':'UTC','locale':'en-US'}},{'type':'selector'," |
| + "'dimension':'__time','value':'11','extractionFn':{'type':'timeFormat'," |
| + "'format':'M','timeZone':'UTC','locale':'en-US'}}]}]}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg'," |
| + "'fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}")) |
| .returnsUnordered("product_id=1558; EXPR$1=10", "product_id=1558; EXPR$1=11", |
| "product_id=1559; EXPR$1=11"); |
| } |
| |
| @Test public void testPushofOrderByWithMonthExtract() { |
| String sqlQuery = "SELECT extract(month from \"timestamp\") as m , \"product_id\", SUM" |
| + "(\"unit_sales\") as s FROM \"foodmart\"" |
| + " WHERE \"product_id\" >= 1558" |
| + " GROUP BY extract(month from \"timestamp\"), \"product_id\" order by m, s, " |
| + "\"product_id\""; |
| sql(sqlQuery).queryContains( |
| druidChecker("{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_month'," |
| + "'extractionFn':{'type':'timeFormat','format':'M','timeZone':'UTC'," |
| + "'locale':'en-US'}},{'type':'default','dimension':'product_id'}]," |
| + "'limitSpec':{'type':'default','columns':[{'dimension':'extract_month'," |
| + "'direction':'ascending','dimensionOrder':'numeric'},{'dimension':'S'," |
| + "'direction':'ascending','dimensionOrder':'numeric'}," |
| + "{'dimension':'product_id','direction':'ascending'," |
| + "'dimensionOrder':'alphanumeric'}]},'filter':{'type':'bound'," |
| + "'dimension':'product_id','lower':'1558','lowerStrict':false," |
| + "'ordering':'numeric'},'aggregations':[{'type':'longSum','name':'S'," |
| + "'fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}")) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[>=(CAST($1)" |
| + ":BIGINT, 1558)], projects=[[EXTRACT_DATE(FLAG(MONTH), /INT(Reinterpret($0), " |
| + "86400000)), $1, $89]], groups=[{0, 1}], aggs=[[SUM($2)]], sort0=[0], sort1=[2], " |
| + "sort2=[1], dir0=[ASC], dir1=[ASC], dir2=[ASC])"); |
| } |
| |
| |
| @Test public void testGroupByFloorTimeWithoutLimit() { |
| final String sql = "select floor(\"timestamp\" to MONTH) as \"month\"\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH)\n" |
| + "order by \"month\" DESC"; |
| sql(sql) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[FLOOR($0, " |
| + "FLAG(MONTH))]], groups=[{0}], aggs=[[]], sort0=[0], dir0=[DESC])") |
| .queryContains(druidChecker("'queryType':'timeseries'", "'descending':true")); |
| } |
| |
| @Test public void testGroupByFloorTimeWithLimit() { |
| final String sql = "select floor(\"timestamp\" to MONTH) as \"floor_month\"\n" |
| + "from \"foodmart\"\n" |
| + "group by floor(\"timestamp\" to MONTH)\n" |
| + "order by \"floor_month\" DESC LIMIT 3"; |
| sql(sql).explainContains("PLAN=EnumerableLimit(fetch=[3])\n" |
| + " EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[FLOOR($0, " |
| + "FLAG(MONTH))]], groups=[{0}], aggs=[[]], sort0=[0], dir0=[DESC])") |
| .queryContains(druidChecker("'queryType':'timeseries'", "'descending':true")) |
| .returnsOrdered("floor_month=1997-12-01 00:00:00", "floor_month=1997-11-01 00:00:00", |
| "floor_month=1997-10-01 00:00:00"); |
| } |
| |
| @Test public void testPushofOrderByYearWithYearMonthExtract() { |
| String sqlQuery = "SELECT year(\"timestamp\") as y, extract(month from \"timestamp\") as m , " |
| + "\"product_id\", SUM" |
| + "(\"unit_sales\") as s FROM \"foodmart\"" |
| + " WHERE \"product_id\" >= 1558" |
| + " GROUP BY year(\"timestamp\"), extract(month from \"timestamp\"), \"product_id\" order" |
| + " by y DESC, m ASC, s DESC, \"product_id\" LIMIT 3"; |
| final String expectedPlan = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[>=(CAST($1)" |
| + ":BIGINT, 1558)], projects=[[EXTRACT_DATE(FLAG(YEAR), /INT(Reinterpret($0), 86400000))," |
| + " EXTRACT_DATE(FLAG(MONTH), /INT(Reinterpret($0), 86400000)), $1, $89]], groups=[{0, 1," |
| + " 2}], aggs=[[SUM($3)]], sort0=[0], sort1=[1], sort2=[3], sort3=[2], dir0=[DESC], " |
| + "dir1=[ASC], dir2=[DESC], dir3=[ASC], fetch=[3])"; |
| final String expectedDruidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_year'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy','timeZone':'UTC'," |
| + "'locale':'en-US'}},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'extract_month','extractionFn':{'type':'timeFormat'," |
| + "'format':'M','timeZone':'UTC','locale':'en-US'}},{'type':'default'," |
| + "'dimension':'product_id'}],'limitSpec':{'type':'default','limit':3," |
| + "'columns':[{'dimension':'extract_year','direction':'descending'," |
| + "'dimensionOrder':'numeric'},{'dimension':'extract_month'," |
| + "'direction':'ascending','dimensionOrder':'numeric'},{'dimension':'S'," |
| + "'direction':'descending','dimensionOrder':'numeric'}," |
| + "{'dimension':'product_id','direction':'ascending'," |
| + "'dimensionOrder':'alphanumeric'}]},'filter':{'type':'bound'," |
| + "'dimension':'product_id','lower':'1558','lowerStrict':false," |
| + "'ordering':'numeric'},'aggregations':[{'type':'longSum','name':'S'," |
| + "'fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sqlQuery).explainContains(expectedPlan).queryContains(druidChecker(expectedDruidQuery)) |
| .returnsOrdered("Y=1997; M=1; product_id=1558; S=6", "Y=1997; M=1; product_id=1559; S=6", |
| "Y=1997; M=2; product_id=1558; S=24"); |
| } |
| |
| @Test public void testPushofOrderByMetricWithYearMonthExtract() { |
| String sqlQuery = "SELECT year(\"timestamp\") as y, extract(month from \"timestamp\") as m , " |
| + "\"product_id\", SUM(\"unit_sales\") as s FROM \"foodmart\"" |
| + " WHERE \"product_id\" >= 1558" |
| + " GROUP BY year(\"timestamp\"), extract(month from \"timestamp\"), \"product_id\" order" |
| + " by s DESC, m DESC, \"product_id\" LIMIT 3"; |
| final String expectedPlan = "PLAN=EnumerableInterpreter\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[>=(CAST($1)" |
| + ":BIGINT, 1558)], projects=[[EXTRACT_DATE(FLAG(YEAR), /INT(Reinterpret($0), 86400000))," |
| + " EXTRACT_DATE(FLAG(MONTH), /INT(Reinterpret($0), 86400000)), $1, $89]], groups=[{0, 1," |
| + " 2}], aggs=[[SUM($3)]], sort0=[3], sort1=[1], sort2=[2], dir0=[DESC], dir1=[DESC], " |
| + "dir2=[ASC], fetch=[3])"; |
| final String expectedDruidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_year'," |
| + "'extractionFn':{'type':'timeFormat','format':'yyyy','timeZone':'UTC'," |
| + "'locale':'en-US'}},{'type':'extraction','dimension':'__time'," |
| + "'outputName':'extract_month','extractionFn':{'type':'timeFormat'," |
| + "'format':'M','timeZone':'UTC','locale':'en-US'}},{'type':'default'," |
| + "'dimension':'product_id'}],'limitSpec':{'type':'default','limit':3," |
| + "'columns':[{'dimension':'S','direction':'descending'," |
| + "'dimensionOrder':'numeric'},{'dimension':'extract_month'," |
| + "'direction':'descending','dimensionOrder':'numeric'}," |
| + "{'dimension':'product_id','direction':'ascending'," |
| + "'dimensionOrder':'alphanumeric'}]},'filter':{'type':'bound'," |
| + "'dimension':'product_id','lower':'1558','lowerStrict':false," |
| + "'ordering':'numeric'},'aggregations':[{'type':'longSum','name':'S'," |
| + "'fieldName':'unit_sales'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sqlQuery).explainContains(expectedPlan).queryContains(druidChecker(expectedDruidQuery)) |
| .returnsOrdered("Y=1997; M=12; product_id=1558; S=30", "Y=1997; M=3; product_id=1558; S=29", |
| "Y=1997; M=5; product_id=1558; S=27"); |
| } |
| |
| @Test public void testGroupByTimeSortOverMetrics() { |
| final String sqlQuery = "SELECT count(*) as c , SUM(\"unit_sales\") as s, floor(\"timestamp\"" |
| + " to month) FROM \"foodmart\" group by floor(\"timestamp\" to month) order by s DESC"; |
| sql(sqlQuery) |
| .explainContains("PLAN=EnumerableInterpreter\n" |
| + " BindableSort(sort0=[$1], dir0=[DESC])\n" |
| + " BindableProject(C=[$1], S=[$2], EXPR$2=[$0])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[FLOOR($0, " |
| + "FLAG(MONTH)), $89]], groups=[{0}], aggs=[[COUNT(), SUM($1)]])") |
| .queryContains(druidChecker("'queryType':'timeseries'")) |
| .returnsOrdered("C=8716; S=26796; EXPR$2=1997-12-01 00:00:00", |
| "C=8231; S=25270; EXPR$2=1997-11-01 00:00:00", |
| "C=7752; S=23763; EXPR$2=1997-07-01 00:00:00", |
| "C=7710; S=23706; EXPR$2=1997-03-01 00:00:00", |
| "C=7038; S=21697; EXPR$2=1997-08-01 00:00:00", |
| "C=7033; S=21628; EXPR$2=1997-01-01 00:00:00", |
| "C=6912; S=21350; EXPR$2=1997-06-01 00:00:00", |
| "C=6865; S=21081; EXPR$2=1997-05-01 00:00:00", |
| "C=6844; S=20957; EXPR$2=1997-02-01 00:00:00", |
| "C=6662; S=20388; EXPR$2=1997-09-01 00:00:00", |
| "C=6588; S=20179; EXPR$2=1997-04-01 00:00:00", |
| "C=6478; S=19958; EXPR$2=1997-10-01 00:00:00"); |
| } |
| |
| @Test public void testNumericOrderingOfOrderByOperatorFullTime() { |
| final String sqlQuery = "SELECT \"timestamp\", count(*) as c, SUM(\"unit_sales\") " |
| + "as s FROM " |
| + "\"foodmart\" group by \"timestamp\" order by \"timestamp\" DESC, c DESC, s LIMIT 5"; |
| final String druidSubQuery = "'limitSpec':{'type':'default','limit':5," |
| + "'columns':[{'dimension':'extract','direction':'descending'," |
| + "'dimensionOrder':'alphanumeric'},{'dimension':'C'," |
| + "'direction':'descending','dimensionOrder':'numeric'},{'dimension':'S'," |
| + "'direction':'ascending','dimensionOrder':'numeric'}]}," |
| + "'aggregations':[{'type':'count','name':'C'},{'type':'longSum'," |
| + "'name':'S','fieldName':'unit_sales'}]"; |
| sql(sqlQuery).returnsOrdered("timestamp=1997-12-30 00:00:00; C=22; S=36\ntimestamp=1997-12-29" |
| + " 00:00:00; C=321; S=982\ntimestamp=1997-12-28 00:00:00; C=480; " |
| + "S=1496\ntimestamp=1997-12-27 00:00:00; C=363; S=1156\ntimestamp=1997-12-26 00:00:00; " |
| + "C=144; S=420").queryContains(druidChecker(druidSubQuery)); |
| |
| } |
| |
| @Test public void testNumericOrderingOfOrderByOperatorTimeExtract() { |
| final String sqlQuery = "SELECT extract(day from \"timestamp\") as d, extract(month from " |
| + "\"timestamp\") as m, year(\"timestamp\") as y , count(*) as c, SUM(\"unit_sales\") " |
| + "as s FROM " |
| + "\"foodmart\" group by extract(day from \"timestamp\"), extract(month from \"timestamp\"), " |
| + "year(\"timestamp\") order by d DESC, m ASC, y DESC LIMIT 5"; |
| final String druidSubQuery = "'limitSpec':{'type':'default','limit':5," |
| + "'columns':[{'dimension':'extract_day','direction':'descending'," |
| + "'dimensionOrder':'numeric'},{'dimension':'extract_month'," |
| + "'direction':'ascending','dimensionOrder':'numeric'}," |
| + "{'dimension':'extract_year','direction':'descending'," |
| + "'dimensionOrder':'numeric'}]}"; |
| sql(sqlQuery).returnsOrdered("D=30; M=3; Y=1997; C=114; S=351\nD=30; M=5; Y=1997; " |
| + "C=24; S=34\nD=30; M=6; Y=1997; C=73; S=183\nD=30; M=7; Y=1997; C=29; S=54\nD=30; M=8; " |
| + "Y=1997; C=137; S=422").queryContains(druidChecker(druidSubQuery)); |
| |
| } |
| |
| @Test public void testNumericOrderingOfOrderByOperatorStringDims() { |
| final String sqlQuery = "SELECT \"brand_name\", count(*) as c, SUM(\"unit_sales\") " |
| + "as s FROM " |
| + "\"foodmart\" group by \"brand_name\" order by \"brand_name\" DESC LIMIT 5"; |
| final String druidSubQuery = "'limitSpec':{'type':'default','limit':5," |
| + "'columns':[{'dimension':'brand_name','direction':'descending'," |
| + "'dimensionOrder':'alphanumeric'}]}"; |
| sql(sqlQuery).returnsOrdered("brand_name=Washington; C=576; S=1775\nbrand_name=Walrus; C=457;" |
| + " S=1399\nbrand_name=Urban; C=299; S=924\nbrand_name=Tri-State; C=2339; " |
| + "S=7270\nbrand_name=Toucan; C=123; S=380").queryContains(druidChecker(druidSubQuery)); |
| |
| } |
| |
| @Test public void testGroupByWeekExtract() { |
| final String sql = "SELECT extract(week from \"timestamp\") from \"foodmart\" where " |
| + "\"product_id\" = 1558 and extract(week from \"timestamp\") IN (10, 11)group by extract" |
| + "(week from \"timestamp\")"; |
| |
| final String druidQuery = "{'queryType':'groupBy','dataSource':'foodmart'," |
| + "'granularity':'all','dimensions':[{'type':'extraction'," |
| + "'dimension':'__time','outputName':'extract_week'," |
| + "'extractionFn':{'type':'timeFormat','format':'w','timeZone':'UTC'," |
| + "'locale':'en-US'}}],'limitSpec':{'type':'default'}," |
| + "'filter':{'type':'and','fields':[{'type':'selector'," |
| + "'dimension':'product_id','value':'1558'},{'type':'or'," |
| + "'fields':[{'type':'selector','dimension':'__time','value':'10'," |
| + "'extractionFn':{'type':'timeFormat','format':'w','timeZone':'UTC'," |
| + "'locale':'en-US'}},{'type':'selector','dimension':'__time'," |
| + "'value':'11','extractionFn':{'type':'timeFormat','format':'w'," |
| + "'timeZone':'UTC','locale':'en-US'}}]}]}," |
| + "'aggregations':[{'type':'longSum','name':'dummy_agg'," |
| + "'fieldName':'dummy_agg'}]," |
| + "'intervals':['1900-01-09T00:00:00.000/2992-01-10T00:00:00.000']}"; |
| sql(sql).returnsOrdered("EXPR$0=10\nEXPR$0=11").queryContains(druidChecker(druidQuery)); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1765">[CALCITE-1765] |
| * Druid adapter: Gracefully handle granularity that cannot be pushed to |
| * extraction function</a>. */ |
| @Test public void testTimeExtractThatCannotBePushed() { |
| final String sql = "SELECT extract(CENTURY from \"timestamp\") from \"foodmart\" where " |
| + "\"product_id\" = 1558 group by extract(CENTURY from \"timestamp\")"; |
| final String plan = "PLAN=EnumerableInterpreter\n" |
| + " BindableAggregate(group=[{0}])\n" |
| + " BindableProject(EXPR$0=[EXTRACT_DATE(FLAG(CENTURY), /INT(Reinterpret($0), 86400000))])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], filter=[=($1, 1558)], " |
| + "projects=[[$0]])"; |
| sql(sql).explainContains(plan).queryContains(druidChecker("'queryType':'select'")) |
| .returnsUnordered("EXPR$0=20"); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1770">[CALCITE-1770] |
| * Druid adapter: CAST(NULL AS ...) gives NPE</a>. */ |
| @Test public void testPushCast() { |
| final String sql = "SELECT \"product_id\"\n" |
| + "from \"foodmart\"\n" |
| + "where \"product_id\" = cast(NULL as varchar)\n" |
| + "group by \"product_id\""; |
| final String plan = "PLAN=EnumerableInterpreter\n" |
| + " BindableAggregate(group=[{0}])\n" |
| + " BindableFilter(condition=[=($0, null)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], projects=[[$1]])"; |
| sql(sql).explainContains(plan); |
| } |
| |
| @Test public void testFalseFilter() { |
| String sql = "Select count(*) as c from \"foodmart\" where false"; |
| sql(sql).returnsUnordered("C=0"); |
| } |
| |
| @Test public void testFalseFilterCaseConjectionWithTrue() { |
| String sql = "Select count(*) as c from \"foodmart\" where " |
| + "\"product_id\" = 1558 and (true or false)"; |
| sql(sql).returnsUnordered("C=60").queryContains(druidChecker("'queryType':'timeseries'")); |
| } |
| |
| /** Test case for |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1769">[CALCITE-1769] |
| * Druid adapter: Push down filters involving numeric cast of literals</a>. */ |
| @Test public void testPushCastNumeric() { |
| String druidQuery = "'filter':{'type':'bound','dimension':'product_id'," |
| + "'upper':'10','upperStrict':true,'ordering':'numeric'}"; |
| sql("?") |
| .withRel(new Function<RelBuilder, RelNode>() { |
| public RelNode apply(RelBuilder b) { |
| // select product_id |
| // from foodmart.foodmart |
| // where product_id < cast(10 as varchar) |
| final RelDataType intType = |
| b.getTypeFactory().createSqlType(SqlTypeName.INTEGER); |
| return b.scan("foodmart", "foodmart") |
| .filter( |
| b.call(SqlStdOperatorTable.LESS_THAN, |
| b.getRexBuilder().makeCall(intType, |
| SqlStdOperatorTable.CAST, |
| ImmutableList.<RexNode>of(b.field("product_id"))), |
| b.getRexBuilder().makeCall(intType, |
| SqlStdOperatorTable.CAST, |
| ImmutableList.of(b.literal("10"))))) |
| .project(b.field("product_id")) |
| .build(); |
| } |
| }) |
| .queryContains(druidChecker(druidQuery)); |
| } |
| |
| @Test public void testPushFieldEqualsLiteral() { |
| sql("?") |
| .withRel(new Function<RelBuilder, RelNode>() { |
| public RelNode apply(RelBuilder b) { |
| // select count(*) as c |
| // from foodmart.foodmart |
| // where product_id = 'id' |
| return b.scan("foodmart", "foodmart") |
| .filter( |
| b.call(SqlStdOperatorTable.EQUALS, b.field("product_id"), |
| b.literal("id"))) |
| .aggregate(b.groupKey(), b.countStar("c")) |
| .build(); |
| } |
| }) |
| // Should return one row, "c=0"; logged |
| // [CALCITE-1775] "GROUP BY ()" on empty relation should return 1 row |
| .returnsUnordered() |
| .queryContains(druidChecker("'queryType':'timeseries'")); |
| } |
| |
| /** |
| * <a href="https://issues.apache.org/jira/browse/CALCITE-1805">[CALCITE-1805] |
| * Druid adapter cannot handle count column without adding support for nested queries</a>. |
| */ |
| @Test public void testCountColumn() { |
| final String sql = "SELECT count(\"countryName\") FROM (SELECT \"countryName\" FROM " |
| + "\"wikiticker\" WHERE \"countryName\" IS NOT NULL) as a"; |
| sql(sql, WIKI_AUTO2) |
| .returnsUnordered("EXPR$0=3799"); |
| |
| final String sql2 = "SELECT count(\"countryName\") FROM (SELECT \"countryName\" FROM " |
| + "\"wikiticker\") as a"; |
| final String plan2 = "PLAN=EnumerableInterpreter\n" |
| + " BindableAggregate(group=[{}], EXPR$0=[COUNT($0)])\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], " |
| + "intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], projects=[[$7]])"; |
| sql(sql2, WIKI_AUTO2) |
| .returnsUnordered("EXPR$0=3799") |
| .explainContains(plan2); |
| |
| final String sql3 = "SELECT count(*), count(\"countryName\") FROM \"wikiticker\""; |
| final String plan3 = "PLAN=EnumerableInterpreter\n" |
| + " BindableAggregate(group=[{}], EXPR$0=[COUNT()], EXPR$1=[COUNT($0)])\n" |
| + " DruidQuery(table=[[wiki, wikiticker]], " |
| + "intervals=[[1900-01-01T00:00:00.000/3000-01-01T00:00:00.000]], projects=[[$7]])"; |
| sql(sql3, WIKI_AUTO2) |
| .explainContains(plan3); |
| } |
| |
| /** |
| * Test to make sure the "not" filter has only 1 field, rather than an array of fields. |
| */ |
| @Test public void testNotFilterForm() { |
| String sql = "select count(distinct \"the_month\") from " |
| + "\"foodmart\" where \"the_month\" <> \'October\'"; |
| String druidFilter = "'filter':{'type':'not'," |
| + "'field':{'type':'selector','dimension':'the_month','value':'October'}}"; |
| // Check that the filter actually worked, and that druid was responsible for the filter |
| sql(sql, FOODMART) |
| .queryContains(druidChecker(druidFilter)) |
| .returnsOrdered("EXPR$0=11"); |
| } |
| |
| /** |
| * Test to ensure that count(distinct ...) gets pushed to Druid when approximate results are |
| * acceptable |
| * */ |
| @Test public void testDistinctCountWhenApproxResultsAccepted() { |
| String sql = "select count(distinct \"customer_id\") from \"foodmart\""; |
| String expectedSubExplain = "DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00" |
| + ":00:00.000/2992-01-10T00:00:00.000]], groups=[{}], aggs=[[COUNT(DISTINCT $20)]])"; |
| String expectedAggregate = "{'type':'cardinality','name':" |
| + "'EXPR$0','fieldNames':['customer_id']}"; |
| |
| testCountWithApproxDistinct(true, sql, expectedSubExplain, expectedAggregate); |
| } |
| |
| /** |
| * Test to ensure that count(distinct ...) doesn't get pushed to Druid when approximate results |
| * are not acceptable |
| */ |
| @Test public void testDistinctCountWhenApproxResultsNotAccepted() { |
| String sql = "select count(distinct \"customer_id\") from \"foodmart\""; |
| String expectedSubExplain = " BindableAggregate(group=[{}], EXPR$0=[COUNT($0)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "groups=[{20}], aggs=[[]])"; |
| |
| testCountWithApproxDistinct(false, sql, expectedSubExplain); |
| } |
| |
| /** |
| * Test to ensure that a count distinct on metric does not get pushed into Druid |
| */ |
| @Test public void testDistinctCountOnMetric() { |
| String sql = "select count(distinct \"store_sales\") from \"foodmart\" " |
| + "where \"store_state\" = 'WA'"; |
| String expectedSubExplain = " BindableAggregate(group=[{}], EXPR$0=[COUNT($0)])\n" |
| + " BindableAggregate(group=[{1}])"; |
| |
| testCountWithApproxDistinct(true, sql, expectedSubExplain); |
| testCountWithApproxDistinct(false, sql, expectedSubExplain); |
| } |
| |
| /** |
| * Test to ensure that a count on a metric does not get pushed into Druid |
| */ |
| @Test public void testCountOnMetric() { |
| String sql = "select \"brand_name\", count(\"store_sales\") from \"foodmart\" " |
| + "group by \"brand_name\""; |
| String expectedSubExplain = " BindableAggregate(group=[{0}], EXPR$1=[COUNT($1)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000/" |
| + "2992-01-10T00:00:00.000]], projects=[[$2, $90]])"; |
| |
| testCountWithApproxDistinct(true, sql, expectedSubExplain); |
| testCountWithApproxDistinct(false, sql, expectedSubExplain); |
| } |
| |
| /** |
| * Test to ensure that count(*) is pushed into Druid |
| */ |
| @Test public void testCountStar() { |
| String sql = "select count(*) from \"foodmart\""; |
| String expectedSubExplain = " DruidQuery(table=[[foodmart, foodmart]], " |
| + "intervals=[[1900-01-09T00:00:00.000/2992-01-10T00:00:00.000]], " |
| + "projects=[[]], groups=[{}], aggs=[[COUNT()]])"; |
| |
| sql(sql).explainContains(expectedSubExplain); |
| } |
| |
| /** |
| * Test to ensure that count() aggregates with metric columns are not pushed into Druid |
| * even when the metric column has been renamed |
| */ |
| @Test public void testCountOnMetricRenamed() { |
| String sql = "select \"B\", count(\"A\") from " |
| + "(select \"unit_sales\" as \"A\", \"customer_id\" as \"B\" from \"foodmart\") " |
| + "group by \"B\""; |
| String expectedSubExplain = " BindableAggregate(group=[{0}], EXPR$1=[COUNT($1)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:00.000" |
| + "/2992-01-10T00:00:00.000]], projects=[[$20, $89]])\n"; |
| |
| testCountWithApproxDistinct(true, sql, expectedSubExplain); |
| testCountWithApproxDistinct(false, sql, expectedSubExplain); |
| } |
| |
| @Test public void testDistinctCountOnMetricRenamed() { |
| String sql = "select \"B\", count(distinct \"A\") from " |
| + "(select \"unit_sales\" as \"A\", \"customer_id\" as \"B\" from \"foodmart\") " |
| + "group by \"B\""; |
| String expectedSubExplain = " BindableAggregate(group=[{0}], EXPR$1=[COUNT($1)])\n" |
| + " DruidQuery(table=[[foodmart, foodmart]], intervals=[[1900-01-09T00:00:" |
| + "00.000/2992-01-10T00:00:00.000]], projects=[[$20, $89]], groups=[{0, 1}], " |
| + "aggs=[[]])"; |
| |
| testCountWithApproxDistinct(true, sql, expectedSubExplain); |
| testCountWithApproxDistinct(false, sql, expectedSubExplain); |
| } |
| |
| private void testCountWithApproxDistinct(boolean approx, String sql, String expectedExplain) { |
| testCountWithApproxDistinct(approx, sql, expectedExplain, ""); |
| } |
| |
| private void testCountWithApproxDistinct(boolean approx, String sql, String expectedExplain, |
| String expectedDruidQuery) { |
| CalciteAssert.that() |
| .enable(enabled()) |
| .with(ImmutableMap.of("model", FOODMART.getPath())) |
| .with(CalciteConnectionProperty.APPROXIMATE_DISTINCT_COUNT.camelName(), approx) |
| .query(sql) |
| .runs() |
| .explainContains(expectedExplain) |
| .queryContains(druidChecker(expectedDruidQuery)); |
| } |
| } |
| |
| // End DruidAdapterIT.java |