blob: 10e76362fca2daaec2dd52d11767d1e135a0c23b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.optimizer.rules;
import java.util.HashSet;
import java.util.Set;
import org.apache.asterix.om.functions.BuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import org.apache.hyracks.algebricks.rewriter.rules.PushFunctionsBelowJoin;
/**
* Pushes similarity function-call expressions below a join if possible.
* Assigns the similarity function-call expressions to new variables, and replaces the original
* expression with a corresponding variable reference expression.
* This rule can help reduce the cost of computing expensive similarity functions by pushing them below
* a join (which may blow up the cardinality).
* Also, this rule may help to enable other rules such as common subexpression elimination, again to reduce
* the number of calls to expensive similarity functions.
* Example:
* Before plan:
* assign [$$10] <- [funcA(funcB(simFuncX($$3, $$4)))]
* join (some condition)
* join_branch_0 where $$3 and $$4 are not live
* ...
* join_branch_1 where $$3 and $$4 are live
* ...
* After plan:
* assign [$$10] <- [funcA(funcB($$11))]
* join (some condition)
* join_branch_0 where $$3 and $$4 are not live
* ...
* join_branch_1 where $$3 and $$4 are live
* assign[$$11] <- [simFuncX($$3, $$4)]
* ...
*/
public class PushSimilarityFunctionsBelowJoin extends PushFunctionsBelowJoin {
private static final Set<FunctionIdentifier> simFuncIdents = new HashSet<FunctionIdentifier>();
static {
simFuncIdents.add(BuiltinFunctions.SIMILARITY_JACCARD);
simFuncIdents.add(BuiltinFunctions.SIMILARITY_JACCARD_CHECK);
simFuncIdents.add(BuiltinFunctions.EDIT_DISTANCE);
simFuncIdents.add(BuiltinFunctions.EDIT_DISTANCE_CHECK);
}
public PushSimilarityFunctionsBelowJoin() {
super(simFuncIdents);
}
}