testdata/workloads/functional-planner/queries/PlannerTest/card-inner-join.test - impala - Git at Google

 # Join cardinality tests
 # Each has a qualitative description of the quantitative math
 #
 # Simplest join: Cartesian product
 # card = |T1| * |T2|
 select m.id, d.id
 from functional.alltypestiny m,
      functional.alltypessmall d
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:NESTED LOOP JOIN [CROSS JOIN]
 |  row-size=8B cardinality=800
 |
 |--00:SCAN HDFS [functional.alltypestiny m]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall d]
    partitions=4/4 files=4 size=6.32KB
    row-size=4B cardinality=100
 ====
 # Cartesian product between table and a no-stats, 0-row table
 select a.id, b.id
 from functional.alltypestiny a,
      functional.alltypesnopart b
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:NESTED LOOP JOIN [CROSS JOIN]
 |  row-size=8B cardinality=0
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypestiny a]
    partitions=4/4 files=4 size=460B
    row-size=4B cardinality=8
 ====
 # Cartesian product between two empty tables
 select a.id, b.id
 from functional.alltypesnopart a,
      functional.alltypesnopart b
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:NESTED LOOP JOIN [CROSS JOIN]
 |  row-size=8B cardinality=0
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypesnopart a]
    partitions=1/1 files=0 size=0B
    row-size=4B cardinality=0
 ====
 # Simplest M:1 join, join between a master table (M) and a detail table (D)
 # (small):(tiny)
 # |join| = |D|
 select m.id, d.id
 from functional.alltypestiny m,
      functional.alltypessmall d
 where m.id = d.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = m.id
 |  runtime filters: RF000 <- m.id
 |  row-size=8B cardinality=9
 |
 |--00:SCAN HDFS [functional.alltypestiny m]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall d]
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> d.id
    row-size=4B cardinality=100
 ====
 # Same, but reverse WHERE order
 select m.id, d.id
 from functional.alltypestiny m,
      functional.alltypessmall d
 where d.id = m.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = m.id
 |  runtime filters: RF000 <- m.id
 |  row-size=8B cardinality=9
 |
 |--00:SCAN HDFS [functional.alltypestiny m]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall d]
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> d.id
    row-size=4B cardinality=100
 ====
 # Same, but use JOIN ... ON
 select m.id, d.id
 from functional.alltypestiny m
 join functional.alltypessmall d on m.id = d.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = m.id
 |  runtime filters: RF000 <- m.id
 |  row-size=8B cardinality=9
 |
 |--00:SCAN HDFS [functional.alltypestiny m]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall d]
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> d.id
    row-size=4B cardinality=100
 ====
 # Same, reverse ON order
 select m.id, d.id
 from functional.alltypestiny m
 join functional.alltypessmall d on d.id = m.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = m.id
 |  runtime filters: RF000 <- m.id
 |  row-size=8B cardinality=9
 |
 |--00:SCAN HDFS [functional.alltypestiny m]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall d]
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> d.id
    row-size=4B cardinality=100
 ====
 # Same, but reverse FROM order
 select m.id, d.id
 from functional.alltypessmall d,
      functional.alltypestiny m
 where m.id = d.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.id = m.id
 |  runtime filters: RF000 <- m.id
 |  row-size=8B cardinality=9
 |
 |--01:SCAN HDFS [functional.alltypestiny m]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 00:SCAN HDFS [functional.alltypessmall d]
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> d.id
    row-size=4B cardinality=100
 ====
 # Same, but use JOIN ... ON
 select m.id, d.id
 from functional.alltypessmall m
 join functional.alltypestiny d on m.id = d.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: m.id = d.id
 |  runtime filters: RF000 <- d.id
 |  row-size=8B cardinality=9
 |
 |--01:SCAN HDFS [functional.alltypestiny d]
 |     partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 00:SCAN HDFS [functional.alltypessmall m]
    partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> m.id
    row-size=4B cardinality=100
 ====
 # Similar, but with big tables
 # |M| = |customer| = 150K
 # |M.pk| = |customer.c_custkey| = |customer| = 150K
 # |D| = |orders| = 1.5M
 # |D.fk| = |orders.o_custkey| = 100K
 # |join| = |D| = |orders|
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=24B cardinality=1.50M
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     row-size=8B cardinality=150.00K
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=1.50M
 ====
 # Simple join between table and a no-stats, 0-row table
 # Bug: expected cardinality = 0
 select a.id, b.id
 from functional.alltypestiny a,
      functional.alltypesnopart b
 WHERE a.id = b.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
 |  row-size=8B cardinality=8
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypestiny a]
    partitions=4/4 files=4 size=460B
    runtime filters: RF000 -> a.id
    row-size=4B cardinality=8
 ====
 # Cartesian product between two empty tables
 select a.id, b.id
 from functional.alltypesnopart a,
      functional.alltypesnopart b
 WHERE a.id = b.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
 |  row-size=8B cardinality=0
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypesnopart a]
    partitions=1/1 files=0 size=0B
    runtime filters: RF000 -> a.id
    row-size=4B cardinality=0
 ====
 # Selective master filtering
 # |join| = |D|/|D.fk| = 15
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_name = 'foo'
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=54B cardinality=16
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_name = 'foo'
 |     row-size=38B cardinality=1
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=1.50M
 ====
 # Selective master filtering, column common with detail
 # |join| = |D|/|D.fk| = 15
 # Bug: IMPALA-8014, expected cardinality ~15
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=24B cardinality=1
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_custkey = 10
 |     row-size=8B cardinality=1
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_custkey = 10
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=15
 ====
 # Selective detail filtering
 # Card = 1: one detail record finds its master record
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and d.o_orderkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: m.c_custkey = d.o_custkey
 |  runtime filters: RF000 <- d.o_custkey
 |  row-size=24B cardinality=1
 |
 |--01:SCAN HDFS [tpch.orders d]
 |     partitions=1/1 files=1 size=162.56MB
 |     predicates: d.o_orderkey = 10
 |     row-size=16B cardinality=1
 |
 00:SCAN HDFS [tpch.customer m]
    partitions=1/1 files=1 size=23.08MB
    runtime filters: RF000 -> m.c_custkey
    row-size=8B cardinality=150.00K
 ====
 # Broad master filtering
 # |M'| = |M| * 0.33 = 50K
 # Each master finds all its details
 # |join|= |M'| * |D|/|D.fk| = 50K * 15 = 750K
 # Though |M.pk| > |D.fk|, we assume that filtering eliminated the unmatched keys
 # Bug: Several, expected cardinality ~750K
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_name < 'foo'
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=54B cardinality=228.68K
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_name < 'foo'
 |     row-size=38B cardinality=15.00K
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=1.50M
 ====
 # Broad master filtering, column common with detail
 # |D'| = |D| * 0.33 = 500K
 # Each detail finds its master
 # |join| = |D'| = 500K
 # Bug: Expected cardinality ~500K
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_custkey < 1234
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=24B cardinality=22.87K
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_custkey < 1234
 |     row-size=8B cardinality=15.00K
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_custkey < 1234
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=150.00K
 ====
 # Broad detail filtering
 # |D'| = |D| * 0.33 = 500K
 # Every detail finds its master
 # Card = 500K
 # Bug: Expected cardinlity ~500K
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and d.o_orderkey < 1234
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=24B cardinality=150.00K
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     row-size=8B cardinality=150.00K
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_orderkey < 1234
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=150.00K
 ====
 # Filtering on join between table and a no-stats, 0-row table
 # Inner join with a zero-sized table produces 0 rows.
 select a.id, b.id
 from functional.alltypestiny a,
      functional.alltypesnopart b
 WHERE a.id = b.id
   AND a.id = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
 |  row-size=8B cardinality=1
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     predicates: b.id = 10
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypestiny a]
    partitions=4/4 files=4 size=460B
    predicates: a.id = 10
    runtime filters: RF000 -> a.id
    row-size=4B cardinality=1
 ====
 # Filtering on join between table and a no-stats, 0-row table
 # Bug: Expected cardinality ~1
 select a.id, b.id
 from functional.alltypestiny a,
      functional.alltypesnopart b
 WHERE a.id = b.id
   AND b.id = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
 |  row-size=8B cardinality=1
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     predicates: b.id = 10
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypestiny a]
    partitions=4/4 files=4 size=460B
    predicates: a.id = 10
    runtime filters: RF000 -> a.id
    row-size=4B cardinality=1
 ====
 # Cartesian product between two empty tables
 select a.id, b.id
 from functional.alltypesnopart a,
      functional.alltypesnopart b
 WHERE a.id = b.id
   AND a.id = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
 |  row-size=8B cardinality=0
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     predicates: b.id = 10
 |     row-size=4B cardinality=0
 |
 00:SCAN HDFS [functional.alltypesnopart a]
    partitions=1/1 files=0 size=0B
    predicates: a.id = 10
    runtime filters: RF000 -> a.id
    row-size=4B cardinality=0
 ====
 # Selective filtering on both sides
 # |D'| = 1
 # |C'| = 1
 # |D' >< C'| = 0 or 1, depending whether we're lucky
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_name = 'foo'
   and d.o_orderkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: m.c_custkey = d.o_custkey
 |  runtime filters: RF000 <- d.o_custkey
 |  row-size=54B cardinality=1
 |
 |--01:SCAN HDFS [tpch.orders d]
 |     partitions=1/1 files=1 size=162.56MB
 |     predicates: d.o_orderkey = 10
 |     row-size=16B cardinality=1
 |
 00:SCAN HDFS [tpch.customer m]
    partitions=1/1 files=1 size=23.08MB
    predicates: m.c_name = 'foo'
    runtime filters: RF000 -> m.c_custkey
    row-size=38B cardinality=1
 ====
 # Correlated filtering on master
 # |join| = |D| / |D.fk|
 # Bug: Expected cardinality 15
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=24B cardinality=1
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_custkey = 10
 |     row-size=8B cardinality=1
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_custkey = 10
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=15
 ====
 # Correlated filtering on detail
 # |join| = |D| / |D.fk|
 # Bug: Expected cardinality 15
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and d.o_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=24B cardinality=1
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_custkey = 10
 |     row-size=8B cardinality=1
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_custkey = 10
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=15
 ====
 # Redundant correlated filtering on both sides
 # Same as above case internally
 # Bug: Expected cardinality ~1
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_name = 'foo'
   and m.c_custkey = 10
   and d.o_custkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=54B cardinality=1
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_custkey = 10, m.c_name = 'foo'
 |     row-size=38B cardinality=1
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_custkey = 10
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=15
 ====
 # Selective filtering on master, broad on detail
 # |M'| = 1
 # |D'| = |D| * 0.33, uncorrelated with above
 # |D.fk'| = |D.fk| * 0.33
 # Since we assume containment: that the smaller key set
 # is a subset of the larger one.
 # Card = |D'| / max(1, |D.fk'|) = 5
 # Bug: Expected cardinality ~5
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_name = 'foo'
   and d.o_orderkey < 1234
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: d.o_custkey = m.c_custkey
 |  runtime filters: RF000 <- m.c_custkey
 |  row-size=54B cardinality=2
 |
 |--00:SCAN HDFS [tpch.customer m]
 |     partitions=1/1 files=1 size=23.08MB
 |     predicates: m.c_name = 'foo'
 |     row-size=38B cardinality=1
 |
 01:SCAN HDFS [tpch.orders d]
    partitions=1/1 files=1 size=162.56MB
    predicates: d.o_orderkey < 1234
    runtime filters: RF000 -> d.o_custkey
    row-size=16B cardinality=150.00K
 ====
 # Selective filtering on detail, broad on master
 # |M'| = |M| * 0.33
 # |D'| = 1
 # 1/3 chance that the detail finds its master, so card = 0 or 1
 select m.c_custkey, d.o_orderkey
 from tpch.customer m,
      tpch.orders d
 where m.c_custkey = d.o_custkey
   and m.c_name < 'foo'
   and d.o_orderkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: m.c_custkey = d.o_custkey
 |  runtime filters: RF000 <- d.o_custkey
 |  row-size=54B cardinality=1
 |
 |--01:SCAN HDFS [tpch.orders d]
 |     partitions=1/1 files=1 size=162.56MB
 |     predicates: d.o_orderkey = 10
 |     row-size=16B cardinality=1
 |
 00:SCAN HDFS [tpch.customer m]
    partitions=1/1 files=1 size=23.08MB
    predicates: m.c_name < 'foo'
    runtime filters: RF000 -> m.c_custkey
    row-size=38B cardinality=15.00K
 ====
 # M:N join
 # |partsupp| = 800K
 # |lineitem| = 6M
 # NDV(ps_suppkey) = NDV(l_suppkey) = |suppkey| = 9.7K
 # |lineitem >< partsupp| = (|lineitem| * |partsupp|) / |suppkey|
 #                        = 800K * 6M / 10K = 80 * 600 = 480M
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.l_suppkey = t1.ps_suppkey
 |  runtime filters: RF000 <- t1.ps_suppkey
 |  row-size=16B cardinality=494.33M
 |
 |--00:SCAN HDFS [tpch.partsupp t1]
 |     partitions=1/1 files=1 size=112.71MB
 |     row-size=8B cardinality=800.00K
 |
 01:SCAN HDFS [tpch.lineitem t2]
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> t2.l_suppkey
    row-size=8B cardinality=6.00M
 ====
 # M:N join with filtering on smaller table
 # NDV(availqty) = = |availqty| = 10K
 # |partsupp'| = |partsupp| / |availqty|
 #             = 800K / 10K = 80
 # |lineitem >< partsupp'| = |lineitem| * |partsupp'| / |suppkey|
 #                         = 6M * 80 / 10K = 48K
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
   and t1.ps_availqty = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.l_suppkey = t1.ps_suppkey
 |  runtime filters: RF000 <- t1.ps_suppkey
 |  row-size=20B cardinality=48.20K
 |
 |--00:SCAN HDFS [tpch.partsupp t1]
 |     partitions=1/1 files=1 size=112.71MB
 |     predicates: t1.ps_availqty = 10
 |     row-size=12B cardinality=78
 |
 01:SCAN HDFS [tpch.lineitem t2]
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> t2.l_suppkey
    row-size=8B cardinality=6.00M
 ====
 # M:N join with filtering on larger table
 # NDV(l_partkey) = |l_partkey| = 200K
 # |lineitem'| = |lineitem| / |l_partkey| = 6M / 200K = 30
 # |lineitem' >< partsupp| = |lineitem'| * |partsupp| / |suppkey|
 #                         = 30 * 800K / 10K = 2400
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
   and t2.l_partkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.ps_suppkey = t2.l_suppkey
 |  runtime filters: RF000 <- t2.l_suppkey
 |  row-size=24B cardinality=2.47K
 |
 |--01:SCAN HDFS [tpch.lineitem t2]
 |     partitions=1/1 files=1 size=718.94MB
 |     predicates: t2.l_partkey = 10
 |     row-size=16B cardinality=30
 |
 00:SCAN HDFS [tpch.partsupp t1]
    partitions=1/1 files=1 size=112.71MB
    runtime filters: RF000 -> t1.ps_suppkey
    row-size=8B cardinality=800.00K
 ====
 # M:N join with selective filtering on shared column
 # |partsupp'| = |partsupp| / |suppkey| = 800K / 10K = 80
 # But, filtering is done on the join column, so we want:
 # |lineitem >< partsupp'| = |lineitem| * |partsupp'|
 #                         = (80 * 6M) / 10K = 80 * 600 = 48K
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
   and t1.ps_suppkey = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.l_suppkey = t1.ps_suppkey
 |  runtime filters: RF000 <- t1.ps_suppkey
 |  row-size=16B cardinality=50.67K
 |
 |--00:SCAN HDFS [tpch.partsupp t1]
 |     partitions=1/1 files=1 size=112.71MB
 |     predicates: t1.ps_suppkey = 10
 |     row-size=8B cardinality=82
 |
 01:SCAN HDFS [tpch.lineitem t2]
    partitions=1/1 files=1 size=718.94MB
    predicates: t2.l_suppkey = 10
    runtime filters: RF000 -> t2.l_suppkey
    row-size=8B cardinality=618
 ====
 # M:N join with broad filtering on smaller table
 # Most general join case
 # |partsupp'| = |partsupp| * 0.33
 #             = 800K / 3 = 267K
 # |lineietem >< partsupp'| = |lineitem| * |partsupp'| / |suppkey|
 #             = 267K * 6M / 10K = 160M
 # Bug: Expected cardinality ~160M
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
   and t1.ps_availqty < 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.l_suppkey = t1.ps_suppkey
 |  runtime filters: RF000 <- t1.ps_suppkey
 |  row-size=20B cardinality=49.43M
 |
 |--00:SCAN HDFS [tpch.partsupp t1]
 |     partitions=1/1 files=1 size=112.71MB
 |     predicates: t1.ps_availqty < 10
 |     row-size=12B cardinality=80.00K
 |
 01:SCAN HDFS [tpch.lineitem t2]
    partitions=1/1 files=1 size=718.94MB
    runtime filters: RF000 -> t2.l_suppkey
    row-size=8B cardinality=6.00M
 ====
 # M:N join with broad filtering on larger table
 # |lineitem'| = |lineitem| * 0.33 = 6M / 3 = 2M
 # |lineitem' >< partsupp| = |lineitem'| * |partsupp| / |suppkey|
 #             = 2M * 800K / 10K = 160M
 # Bug: Expected cardinality ~161M
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
   and t2.l_partkey < 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.l_suppkey = t1.ps_suppkey
 |  runtime filters: RF000 <- t1.ps_suppkey
 |  row-size=24B cardinality=49.43M
 |
 |--00:SCAN HDFS [tpch.partsupp t1]
 |     partitions=1/1 files=1 size=112.71MB
 |     row-size=8B cardinality=800.00K
 |
 01:SCAN HDFS [tpch.lineitem t2]
    partitions=1/1 files=1 size=718.94MB
    predicates: t2.l_partkey < 10
    runtime filters: RF000 -> t2.l_suppkey
    row-size=16B cardinality=600.12K
 ====
 # M:N join with filtering on both sides
 # Most general join case
 # |lineitem'| = |lineitem| * 0.33
 # |partsup'| = |partsupp| * 0.33
 # |suppkey'| = |suppkey| * 0.33
 # |lineitem' >< partsupp'| = |lineitem'| * |partsupp'| / |suppkey'|
 #              = 800K/3 * 6M/3 / 3.3K = 161M
 # (Recall the containment assumption above.)
 # Bug: Expected cardinality ~161M
 select t1.ps_suppkey, t2.l_suppkey
 from tpch.partsupp t1,
      tpch.lineitem t2
 where t1.ps_suppkey = t2.l_suppkey
   and t1.ps_availqty < 10
   and t2.l_partkey < 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t2.l_suppkey = t1.ps_suppkey
 |  runtime filters: RF000 <- t1.ps_suppkey
 |  row-size=28B cardinality=49.43M
 |
 |--00:SCAN HDFS [tpch.partsupp t1]
 |     partitions=1/1 files=1 size=112.71MB
 |     predicates: t1.ps_availqty < 10
 |     row-size=12B cardinality=80.00K
 |
 01:SCAN HDFS [tpch.lineitem t2]
    partitions=1/1 files=1 size=718.94MB
    predicates: t2.l_partkey < 10
    runtime filters: RF000 -> t2.l_suppkey
    row-size=16B cardinality=600.12K
 ====
 # Join with join-level filter predicates
 # |join| = |orders| * sel(c.nationkey + o.o_shippriority = 10)
 # |orders| = 1.5M
 # sel(c.nationkey + o.o_shippriority = 10) = 0.1 (can't use NDV)
 # |join| = 1.5M * .1 = 150K
 # Bug: Several, expected cardinality ~1.5M
 select c.c_custkey, o.o_orderkey
 from tpch.customer c,
      tpch.orders o
 where c.c_custkey = o.o_custkey
   and c.c_nationkey + o.o_shippriority = 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: o.o_custkey = c.c_custkey
 |  other predicates: c.c_nationkey + o.o_shippriority = 10
 |  runtime filters: RF000 <- c.c_custkey
 |  row-size=30B cardinality=1.50M
 |
 |--00:SCAN HDFS [tpch.customer c]
 |     partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 01:SCAN HDFS [tpch.orders o]
    partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o.o_custkey
    row-size=20B cardinality=1.50M
 ====
 # Join with a table that has no stats and zero rows.
 # The zero rows trumps the stats.
 # Zero-row table is on the build side, |join| = 0
 # Bug: Expected cardinality = 0
 select a.int_col, b.int_col
 from functional.alltypesagg a, functional.alltypesnopart b
 where a.id = b.id
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  runtime filters: RF000 <- b.id
 |  row-size=16B cardinality=11.00K
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     row-size=8B cardinality=0
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> a.id
    row-size=8B cardinality=11.00K
 ====
 # Adding table and join filters should not affect the result.
 # Bug: Expected cardinality = 0
 select a.int_col, b.int_col
 from functional.alltypesagg a, functional.alltypesnopart b
 where a.id = b.id
   and a.smallint_col = 10
   and b.smallint_col = 20
   and a.int_col + b.int_col > 30
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.id = b.id
 |  other predicates: a.int_col + b.int_col > 30
 |  runtime filters: RF000 <- b.id
 |  row-size=20B cardinality=113
 |
 |--01:SCAN HDFS [functional.alltypesnopart b]
 |     partitions=1/1 files=0 size=0B
 |     predicates: b.smallint_col = 20
 |     row-size=10B cardinality=0
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partitions=11/11 files=11 size=814.73KB
    predicates: a.smallint_col = 10
    runtime filters: RF000 -> a.id
    row-size=10B cardinality=113
 ====
 # Join on a computed column
 # Assumes Cartesian product * 0.1
 # |join| = 11K * 7K * 0.1 = 7M
 # Bug: Expected cardinality ~7M
 select a.id, b.id
 from functional.alltypes a, functional.alltypesagg b
 where a.id = b.id + b.int_col
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: b.id + b.int_col = a.id
 |  runtime filters: RF000 <- a.id
 |  row-size=12B cardinality=11.00K
 |
 |--00:SCAN HDFS [functional.alltypes a]
 |     partitions=24/24 files=24 size=478.45KB
 |     row-size=4B cardinality=7.30K
 |
 01:SCAN HDFS [functional.alltypesagg b]
    partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> b.id + b.int_col
    row-size=8B cardinality=11.00K
 ====