blob: 19415d5995688d4a5dbd2a13ebdcc57e3cf85d04 [file] [log] [blame]
drop dataverse fuzzyjoin if exists;
create dataverse fuzzyjoin;
use dataverse fuzzyjoin;
create type UserType as open {
uid: int32,
name: string,
lottery_numbers: [int32],
interests: <string>
}
create type VisitorType as open {
vid: int32,
name: string,
lottery_numbers: [int32],
interests: <string>
}
create nodegroup group1 if not exists on nc1, nc2;
create dataset Users(UserType) partitioned by key uid on group1;
create dataset Visitors(VisitorType) partitioned by key vid on group1;
load dataset Users from nc1:'data/users-visitors-small/users.json';
load dataset Visitors from nc1:'data/users-visitors-small/visitors.json';
write output to nc1:'rttest/fuzzyjoin_user-vis-lot-aqlplus_4.adm';
set simfunction 'Jaccard';
set simthreshold '.6f';
for $user in dataset('Users')
for $visitor in dataset('Visitors')
where $user.lottery_numbers ~= $visitor.lottery_numbers
order by $user.uid, $visitor.vid
return {'user': $user, 'visitor': $visitor}