blob: 61816b82d253b1785ee228d22aa5db4a45d14b6a [file] [log] [blame]
* Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest lists.
* Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
* We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
* We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
* Success : Yes
drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type AddressType as closed {
number: int32,
street: string,
city: string
create type CustomerType as closed {
cid: int32,
name: string,
age: int32?,
address: AddressType?,
interests: [string],
children: [ { name: string, age: int32? } ]
create dataset Customers(CustomerType) partitioned by key cid;
create index interests_index on Customers(interests) type keyword;
write output to nc1:"rttest/inverted-index-join-noeqjoin_olist-jaccard-inline.adm";
for $a in dataset('Customers')
for $b in dataset('Customers')
let $jacc := /*+ indexnl */ similarity-jaccard($a.interests, $b.interests)
where $jacc >= 0.7f and $a.cid < $b.cid
return {"ainterests": $a.interests, "binterests": $b.interests, "jacc": $jacc }