blob: 1dddf5e7d6b5599ae20b8f5e2a3e71c1b1eccbbe [file] [log] [blame]
/*
* Description : Fuzzy joins two datasets, Customer and Customer2, based on ~= using edit distance of their interest lists.
* Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
* Success : Yes
*/
drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type AddressType as closed {
number: int32,
street: string,
city: string
}
create type CustomerType as closed {
cid: int32,
name: string,
age: int32?,
address: AddressType?,
interests: [string],
children: [ { name: string, age: int32? } ]
}
create dataset Customers(CustomerType) partitioned by key cid;
create dataset Customers2(CustomerType) partitioned by key cid;
create index interests_index on Customers(interests) type keyword;
write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_01.adm";
set simfunction 'edit-distance';
set simthreshold '3';
for $a in dataset('Customers')
for $b in dataset('Customers2')
where $a.interests ~= $b.interests and $a.cid < $b.cid
return {"arec": $a, "brec": $b }