| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.mrql; |
| |
| import org.apache.mrql.gen.*; |
| import java.util.*; |
| import java.io.*; |
| |
| |
| /** simplify algebraic forms using heuristic rewriting rules that improve performance in most cases */ |
| public class Simplification extends Normalization { |
| |
| /* true if x is functional dependent on y (ie, equal x's implies equal y's) */ |
| private static boolean functional_dependent ( Tree x, Tree y ) { |
| if (x.equals(y)) |
| return true; |
| match y { |
| case tuple(...ts): |
| for ( Tree t: ts ) |
| if (functional_dependent(x,t)) |
| return true; |
| case record(...rs): |
| for ( Tree r: rs ) |
| match r { |
| case bind(_,`t): |
| if (functional_dependent(x,t)) |
| return true; |
| } |
| }; |
| return false; |
| } |
| |
| private static boolean simple_accessor ( Tree v, Tree e ) { |
| match e { |
| case nth(`u,`n): return simple_accessor(v,u); |
| case project(`u,`a): return simple_accessor(v,u); |
| }; |
| return e.equals(v); |
| } |
| |
| private static Trees simple_accessors ( Tree v, Tree e ) { |
| if (simple_accessor(v,e)) |
| return #[`e]; |
| match e { |
| case `f(...al): |
| Trees res = #[ ]; |
| for ( Tree a: al ) |
| res = res.append(simple_accessors(v,a)); |
| return res; |
| }; |
| return #[ ]; |
| } |
| |
| private static Tree factor_out_call ( Tree e, Tree v ) { |
| match e { |
| case call(`f,...): |
| if (!free_variables(e,#[`v]).is_empty()) |
| fail; |
| return e; |
| case `f(...al): |
| for ( Tree a: al ) { |
| Tree b = factor_out_call(a,v); |
| if (b != null) |
| return b; |
| }; |
| return null; |
| }; |
| return null; |
| } |
| |
| private static boolean contains_join ( Tree x ) { |
| match x { |
| case join(...): return true; |
| case lambda(...): return false; |
| case `f(...al): |
| for ( Tree a: al ) |
| if (contains_join(a)) |
| return true; |
| }; |
| return false; |
| } |
| |
| /** Algebraic normalization (algebra to algebra) |
| * @param e algebraic expression |
| * @return an improved algebraic expression |
| */ |
| public static Tree simplify ( Tree e ) { |
| match e { |
| case cmap(`f,cmap(lambda(`x,`g),`s)): |
| return simplify(#<cmap(lambda(`x,cmap(`f,`g)),`s)>); |
| case map(`f,cmap(lambda(`x,`g),`s)): |
| return simplify(#<cmap(lambda(`x,map(`f,`g)),`s)>); |
| case cmap(`g,join(`k1,`k2,lambda(`p,`f),`X,`Y)): |
| return simplify(#<join(`k1,`k2,lambda(`p,cmap(`g,`f)),`X,`Y)>); |
| case cmap(lambda(`x,`S(`y)),`u): |
| if (is_collection(S) && x.equals(y)) |
| return simplify(u); |
| else fail |
| case cmap(lambda(`x,`b),`S(`a)): |
| if (is_collection(S) && x.is_variable()) |
| return simplify(subst_var(x,a,b)); |
| else fail |
| case cmap(`f,`S()): |
| if (is_collection(S)) |
| return #<`S()>; |
| else fail |
| case cmap(lambda(`x,`T(`b)),`S(...as)): |
| if (is_collection(S) && is_collection(T) && x.is_variable()) { |
| Trees bs = #[]; |
| for ( Tree a: as ) |
| bs = bs.append(simplify(subst_var(x,a,b))); |
| return #<`T(...bs)>; |
| } else fail |
| case map(lambda(`x,`b),`S(`a)): |
| if (is_collection(S) && x.is_variable()) |
| return #<`S(`(simplify(subst_var(x,a,b))))>; |
| else fail |
| case map(`f,`S()): |
| if (is_collection(S)) |
| return #<`S()>; |
| else fail |
| case filter(lambda(`x,`b),`m,`S(`a)): |
| if (is_collection(S) && x.is_variable()) |
| return simplify(#<if(`(subst_var(x,a,b)),apply(`m,`a),`S())>); |
| else fail |
| case filter(`p,`m,`S()): |
| if (is_collection(S)) |
| return #<`S()>; |
| else fail |
| case cmap(`f,if(`p,`x,`y)): |
| return simplify(#<if(`p,cmap(`f,`x),cmap(`f,`y))>); |
| // if the group-by key is the same as the join key, fuse the group-by into the join |
| case join(lambda(`v,nth(`v1,0)), `ky, `r, |
| groupBy(`X), `Y): |
| if (!v1.equals(v) || !Config.groupJoinOpt) |
| fail; |
| Tree nv = new_var(); |
| type_env.insert(nv.toString(),TypeInference.type_inference(#<tuple(`X,`Y)>)); |
| return simplify(#<join(lambda(`v,nth(`v1,0)), `ky, |
| lambda(`nv,apply(`r,tuple(groupBy(nth(`nv,0)),nth(`nv,1)))), |
| `X, `Y)>); |
| // ... same for the right join input |
| case join(`kx, lambda(`v,nth(`v1,0)), `r, |
| `X, groupBy(`Y)): |
| if (!v1.equals(v) || !Config.groupJoinOpt) |
| fail; |
| Tree nv = new_var(); |
| type_env.insert(nv.toString(),TypeInference.type_inference(#<tuple(`X,`Y)>)); |
| return simplify(#<join(`kx, lambda(`v,nth(`v1,0)), |
| lambda(`nv,apply(`r,tuple(nth(`nv,0),groupBy(nth(`nv,1))))), |
| `X, `Y)>); |
| // push projection before join: if the join reducer contains a function call |
| // that depends on the left input, push it to the left input |
| case join(lambda(`w,`kx),`ky, |
| lambda(`v,cmap(lambda(`v1,cmap(lambda(`v2,bag(`b)), |
| nth(`vy,1))), |
| nth(`vx,0))), |
| `X,`Y): |
| if (!vx.equals(v) || !vy.equals(v)) |
| fail; |
| Tree factor = factor_out_call(b,v1); |
| if (factor == null) |
| fail; |
| Tree nv = new_var(); |
| b = subst(factor,#<nth(`nv,1)>,b); |
| Trees rest = simple_accessors(v1,subst(factor,#<tuple()>,b)); |
| type_env.insert(nv.toString(),TypeInference.type_inference(#<tuple(`kx,`factor,...rest)>)); |
| type_env.insert(v.toString(),TypeInference.type_inference(#<tuple(bag(`nv),nth(`v,1))>)); |
| int i = 2; |
| Tree reducer = b; |
| for ( Tree a: rest ) |
| reducer = subst(a,#<nth(`nv,`(i++))>,reducer); |
| Tree key = subst(w,v1,kx); |
| return simplify(#<join(lambda(`nv,nth(`nv,0)),`ky, |
| lambda(`v,cmap(lambda(`nv,cmap(lambda(`v2,bag(`reducer)), |
| nth(`vy,1))), |
| nth(`vx,0))), |
| cmap(lambda(`v1,bag(tuple(`key,`factor,...rest))),`X), |
| `Y)>); |
| // push projection before join: if the join reducer contains a function call |
| // that depends on the right input, push it to the right input |
| case join(`kx,lambda(`w,`ky), |
| lambda(`v,cmap(lambda(`v1,cmap(lambda(`v2,bag(`b)), |
| nth(`vy,1))), |
| nth(`vx,0))), |
| `X,`Y): |
| if (!vx.equals(v) || !vy.equals(v)) |
| fail; |
| Tree factor = factor_out_call(b,v2); |
| if (factor == null) |
| fail; |
| Tree nv = new_var(); |
| b = subst(factor,#<nth(`nv,1)>,b); |
| Trees rest = simple_accessors(v2,subst(factor,#<tuple()>,b)); |
| type_env.insert(nv.toString(),TypeInference.type_inference(#<tuple(`ky,`factor,...rest)>)); |
| type_env.insert(v.toString(),TypeInference.type_inference(#<tuple(nth(`v,0),bag(`nv))>)); |
| int i = 2; |
| Tree reducer = b; |
| Tree key = subst(w,v2,ky); |
| for ( Tree a: rest ) |
| reducer = subst(a,#<nth(`nv,`(i++))>,reducer); |
| return simplify(#<join(`kx,lambda(`nv,nth(`nv,0)), |
| lambda(`v,cmap(lambda(`v1,cmap(lambda(`nv,bag(`reducer)), |
| nth(`vy,1))), |
| nth(`vx,0))), |
| `X, |
| cmap(lambda(`v2,bag(tuple(`key,`factor,...rest))),`Y))>); |
| // if the reducer of a join generates pairs (k,v), where k is functional dependent |
| // on a join key, then the outer groupBy just groups the v values |
| case groupBy(join(lambda(`vx,`bx),`ky, |
| lambda(`v,cmap(lambda(`x,cmap(lambda(`y,bag(tuple(`ex,`br))), |
| nth(`v1,1))), |
| nth(`v2,0))), |
| `X,`Y)): |
| if (v1.equals(v) && v2.equals(v) && functional_dependent(subst(vx,x,bx),ex)) |
| return simplify(#<join(lambda(`vx,`bx),`ky, |
| lambda(`v,groupBy(cmap(lambda(`x,cmap(lambda(`y,bag(tuple(`ex,`br))), |
| nth(`v1,1))), |
| nth(`v2,0)))), |
| `X,`Y)>); |
| fail |
| // same for the right key |
| case groupBy(join(`kx,lambda(`vy,`by), |
| lambda(`v,cmap(lambda(`x,cmap(lambda(`y,bag(tuple(`ey,`br))), |
| nth(`v1,1))), |
| nth(`v2,0))), |
| `X,`Y)): |
| if (v1.equals(v) && v2.equals(v) && functional_dependent(subst(vy,y,by),ey)) |
| return simplify(#<join(`kx,lambda(`vy,`by), |
| lambda(`v,groupBy(cmap(lambda(`x,cmap(lambda(`y,bag(tuple(`ey,`br))), |
| nth(`v1,1))), |
| nth(`v2,0)))), |
| `X,`Y)>); |
| fail |
| // same for the left key, different nesting |
| case groupBy(join(lambda(`vx,`bx),`ky, |
| lambda(`v,cmap(lambda(`y,cmap(lambda(`x,bag(tuple(`ex,`br))), |
| nth(`v1,0))), |
| nth(`v2,1))), |
| `X,`Y)): |
| if (v1.equals(v) && v2.equals(v) && functional_dependent(subst(vx,x,bx),ex)) |
| return simplify(#<join(lambda(`vx,`bx),`ky, |
| lambda(`v,groupBy(cmap(lambda(`y,cmap(lambda(`x,bag(tuple(`ex,`br))), |
| nth(`v1,1))), |
| nth(`v2,0)))), |
| `X,`Y)>); |
| fail |
| // same for the right key, different nesting |
| case groupBy(join(`kx,lambda(`vy,`by), |
| lambda(`v,cmap(lambda(`y,cmap(lambda(`x,bag(tuple(`ey,`br))), |
| nth(`v1,0))), |
| nth(`v2,1))), |
| `X,`Y)): |
| if (v1.equals(v) && v2.equals(v) && functional_dependent(subst(vy,y,by),ey)) |
| return simplify(#<join(`kx,lambda(`vy,`by), |
| lambda(`v,groupBy(cmap(lambda(`y,cmap(lambda(`x,bag(tuple(`ey,`br))), |
| nth(`v1,0))), |
| nth(`v2,1)))), |
| `X,`Y)>); |
| fail |
| // same for the left key, right nested |
| case groupBy(join(lambda(`vx,`bx),`ky, |
| lambda(`v,cmap(lambda(`x,bag(tuple(`ex,`br))), |
| nth(`v1,0))), |
| `X,`Y)): |
| if (v1.equals(v) && functional_dependent(subst(vx,x,bx),ex)) |
| return simplify(#<join(lambda(`vx,`bx),`ky, |
| lambda(`v,groupBy(cmap(lambda(`x,bag(tuple(`ex,`br))), |
| nth(`v1,0)))), |
| `X,`Y)>); |
| fail |
| // same for the right key, left nested |
| case groupBy(join(`kx,lambda(`vy,`by), |
| lambda(`v,cmap(lambda(`y,bag(tuple(`ey,`br))), |
| nth(`v2,1))), |
| `X,`Y)): |
| if (v2.equals(v) && functional_dependent(subst(vy,y,by),ey)) |
| return simplify(#<join(`kx,lambda(`vy,`by), |
| lambda(`v,groupBy(cmap(lambda(`y,bag(tuple(`ey,`br))), |
| nth(`v2,1)))), |
| `X,`Y)>); |
| fail |
| // if we group-by the join key, then embed the group-by in the join reducer |
| // (redundant rule) |
| case groupBy(join(`kx,`ky,lambda(`v,cmap(lambda(`v1,cmap(lambda(`v2,bag(tuple(`k,`u))),`e1)),`e2)), |
| `X,`Y)): |
| if (((e1.equals(#<nth(`v,0)>) && e2.equals(#<nth(`v,1)>)) |
| || (e2.equals(#<nth(`v,0)>) && e1.equals(#<nth(`v,1)>))) |
| && (alpha_equivalent(kx,#<lambda(`v1,`k)>) |
| || alpha_equivalent(kx,#<lambda(`v2,`k)>))) |
| return simplify(#<join(`kx,`ky,lambda(`v,groupBy(cmap(lambda(`v1,cmap(lambda(`v2, |
| bag(tuple(`k,`u))),`e1)),`e2))), |
| `X,`Y)>); |
| fail |
| case groupBy(groupBy(`x)): |
| Tree nv = new_var(); |
| return simplify(#<cmap(lambda(`nv,bag(bag(`nv))),groupBy(`x))>); |
| case repeat(lambda(`v,`b),`s,...l): |
| repeat_variables = repeat_variables.cons(v); |
| return #<repeat(lambda(`v,`(simplify(b))),`(simplify(s)),...l)>; |
| case closure(lambda(`v,`b),`s,...l): |
| repeat_variables = repeat_variables.cons(v); |
| return #<closure(lambda(`v,`(simplify(b))),`(simplify(s)),...l)>; |
| case loop(lambda(tuple(...vs),`b),`s,`n): |
| repeat_variables = repeat_variables.append(vs); |
| return #<loop(lambda(tuple(...vs),`(simplify(b))),`(simplify(s)),`n)>; |
| case aggregate(`acc,`zero,`T()): |
| if (is_collection(T)) |
| return zero; |
| else fail |
| case aggregate(`acc,`zero,`T(`s)): |
| if (is_collection(T)) |
| return simplify(#<apply(`acc,tuple(`zero,`s))>); |
| else fail |
| case apply(lambda(`v,`b),`u): |
| if (!v.is_variable()) |
| fail; |
| return simplify(subst_var(v,u,b)); |
| case apply(function(tuple(...el),_,`b),`u): |
| int i = 0; |
| for ( Tree a: el ) |
| match a { |
| case `bind(`v,_): |
| b = subst(v,#<nth(`u,`(i++))>,b); |
| }; |
| return simplify(b); |
| case call(and,true,`u): return simplify(u); |
| case call(and,`u,true): return simplify(u); |
| case call(and,false,`u):return #<false>; |
| case call(and,`u,false): return #<false>; |
| case call(or,true,`u): return #<true>; |
| case call(or,`u,true): return #<true>; |
| case call(or,false,`u): return simplify(u); |
| case call(or,`u,false): return simplify(u); |
| case call(not,true): return #<false>; |
| case call(not,false): return #<true>; |
| case if(true,`e1,`e2): return simplify(e1); |
| case if(false,`e1,`e2): return simplify(e2); |
| case call(count,cmap(lambda(`v,`S(`x)),`u)): |
| if (is_collection(S)) |
| return simplify(#<call(count,`u)>); |
| else fail |
| case call(count,`groupBy(cmap(lambda(`v,`S(tuple(`x,`y))),`u))): |
| if (is_collection(S) && !y.equals(#<0>) && #[groupBy,orderBy].member(#<`groupBy>)) |
| return #<call(count,groupBy(cmap(lambda(`v,`S(tuple(`x,0))),`u)))>; |
| else fail |
| case call(count,`S(...r)): |
| if (is_collection(S)) |
| return #<typed(`(r.length()),long)>; |
| else fail |
| case call(`f,`S(`x)): |
| if (!is_collection(S)) |
| fail; |
| for ( Tree m: monoids ) |
| match m { |
| case `aggr(`mtp,`plus,`zero,`unit): |
| if (!aggr.equals(f.toString())) |
| continue; |
| if (TypeInference.unify(mtp,TypeInference.type_inference2(x)) != null) |
| return simplify(#<apply(`unit,`x)>); |
| }; |
| fail |
| case nth(tuple(...al),`n): |
| if (!n.is_long()) |
| fail; |
| int i = (int)n.longValue(); |
| if (i >= 0 && i < al.length()) |
| return simplify(al.nth(i)); |
| case project(record(...bl),`a): |
| for ( Tree b: bl ) |
| match b { |
| case bind(`v,`u): |
| if (v.equals(a)) |
| return simplify(u); |
| }; |
| case `f(...al): |
| Trees bl = #[]; |
| for ( Tree a: al ) |
| bl = bl.append(simplify(a)); |
| return #<`f(...bl)>; |
| }; |
| return e; |
| } |
| |
| /** Algebraic normalization (algebra to algebra) applied multiple times |
| * @param e algebraic expression |
| * @return an improved algebraic expression |
| */ |
| public static Tree simplify_all ( Tree e ) { |
| Tree ne = simplify(e); |
| if (e.equals(ne)) |
| return e; |
| else return simplify_all(ne); |
| } |
| } |