| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.mrql; |
| |
| import org.apache.mrql.gen.*; |
| |
| /** |
| * Optimize MRQL algebra expressions using normalization rules and heuristic optimizations |
| */ |
| public class AlgebraicOptimization extends Simplification { |
| |
| /** Is this a group-by operation?*/ |
| private static boolean is_groupBy ( Tree e ) { |
| match e { |
| case groupBy(...): return true; |
| case orderBy(...): return true; |
| }; |
| return false; |
| } |
| |
| /** |
| * algebraic optimization (algebra to algebra) |
| * @param e the algebraic form to be optimized |
| * @return the optimized form |
| */ |
| public static Tree translate ( Tree e ) { |
| match e { |
| // convert self-joins to single-source mapreduce; Hadoop doesn't like self-joins anyway |
| case mapReduce2(lambda(`vx,`bx),lambda(`vy,`by),lambda(`s,`f),`X,`Y,`o): |
| if (!alpha_equivalent(X,Y) || !Config.selfJoinOpt) |
| fail; |
| Tree ny = subst(vy,vx,by); |
| Tree tx = null; |
| Tree ty = null; |
| match TypeInference.type_inference(bx) { |
| case _(tuple(_,`t)): tx = t; |
| }; |
| match TypeInference.type_inference(by) { |
| case _(tuple(_,`t)): ty = t; |
| }; |
| Tree b = subst(s,#<tuple(cmap(lambda(tuple(n,v1,v2), |
| if(call(eq,n,1),bag(v1),bag())),s), |
| cmap(lambda(tuple(n,v1,v2), |
| if(call(eq,n,2),bag(v2),bag())),s))>, |
| f); |
| Tree res = #<mapReduce(lambda(`vx,call(plus, |
| cmap(lambda(x,bag(tuple(nth(x,0),tuple(1,nth(x,1),typed(null,`ty))))),`bx), |
| cmap(lambda(y,bag(tuple(nth(y,0),tuple(2,typed(null,`tx),nth(y,1))))),`ny))), |
| lambda(tuple(k,s),`b), |
| `X,`o)>; |
| res = simplify_all(rename(res)); |
| TypeInference.type_inference(res); |
| return translate(res); |
| case mapReduce2(`mx,`my,`r,cmap(lambda(`v,`b),`X),`Y,`o): |
| match X { |
| case groupBy(...): ; |
| case _: |
| Tree nmx = simplify(#<cmap(`mx,`b)>); |
| return translate(#<mapReduce2(lambda(`v,`nmx),`my,`r,`X,`Y,`o)>); |
| }; |
| fail |
| case mapReduce2(`mx,`my,`r,`X,cmap(lambda(`v,`b),`Y),`o): |
| match Y { |
| case groupBy(...): ; |
| case _: |
| Tree nmy = simplify(#<cmap(`my,`b)>); |
| return translate(#<mapReduce2(`mx,lambda(`v,`nmy),`r,`X,`Y,`o)>); |
| }; |
| fail |
| case crossProduct(`mx,`my,`r,cmap(lambda(`vx,`bx),`X),cmap(lambda(`vy,`by),`Y)): |
| return translate(#<crossProduct(lambda(`vx,cmap(`mx,`bx)), |
| lambda(`vy,cmap(`my,`by)),`r,`X,`Y)>); |
| case crossProduct(`mx,`my,`r,cmap(lambda(`v,`b),`X),`Y): |
| return translate(#<crossProduct(lambda(`v,cmap(`mx,`b)),`my,`r,`X,`Y)>); |
| case crossProduct(`mx,`my,`r,`X,cmap(lambda(`v,`b),`Y)): |
| return translate(#<crossProduct(`mx,lambda(`v,cmap(`my,`b)),`r,`X,`Y)>); |
| case cmap(`m,crossProduct(`mx,`my,lambda(`v,`b),`X,`Y)): |
| return translate(#<crossProduct(`mx,`my,lambda(`v,cmap(`m,`b)),`X,`Y)>); |
| case cmap(`r,`groupBy1(cmap(`m,`groupBy2(`s)))): |
| if (! #[groupBy,orderBy].member(#<`groupBy1>) |
| || ! #[groupBy,orderBy].member(#<`groupBy2>)) |
| fail; |
| return #<mapReduce(`(identity()), |
| `(translate(r)), |
| `(translate(#<cmap(`m,`groupBy2(`s))>)), |
| `((#<`groupBy1>.equals(#<orderBy>)) ? #<true> : #<false>))>; |
| case cmap(`r,`groupBy(cmap(`m,`s))): |
| if (! #[groupBy,orderBy].member(#<`groupBy>)) |
| fail; |
| return #<mapReduce(`(translate(m)), |
| `(translate(r)), |
| `(translate(s)), |
| `((#<`groupBy>.equals(#<orderBy>)) ? #<true> : #<false>))>; |
| case `groupBy(cmap(`m,groupBy(`s))): |
| if (! #[groupBy,orderBy].member(#<`groupBy>)) |
| fail; |
| return #<mapReduce(`(identity()), |
| `(identity()), |
| `(translate(#<cmap(`m,groupBy(`s))>)), |
| `((#<`groupBy>.equals(#<orderBy>)) ? #<true> : #<false>))>; |
| case `groupBy(cmap(`m,`s)): |
| if (! #[groupBy,orderBy].member(#<`groupBy>)) |
| fail; |
| return #<mapReduce(`(translate(m)), |
| `(identity()), |
| `(translate(s)), |
| `((#<`groupBy>.equals(#<orderBy>)) ? #<true> : #<false>))>; |
| case cmap(`r,`groupBy(`s)): |
| if (! #[groupBy,orderBy].member(#<`groupBy>)) |
| fail; |
| return #<mapReduce(`(identity()), |
| `(translate(r)), |
| `(translate(s)), |
| `((#<`groupBy>.equals(#<orderBy>)) ? #<true> : #<false>))>; |
| case `groupBy(`s): |
| if (! #[groupBy,orderBy].member(#<`groupBy>)) |
| fail; |
| return #<mapReduce(`(identity()), |
| `(identity()), |
| `(translate(s)), |
| `((#<`groupBy>.equals(#<orderBy>)) ? #<true> : #<false>))>; |
| case cmap(`m,`s): |
| return #<cmap(`(translate(m)), |
| `(translate(s)))>; |
| // convert self-joins to single-source mapreduce; Hadoop doesn't like self-joins anyway |
| case join(lambda(`vx,`bx),lambda(`vy,`by),lambda(`s,`f),`x,`y): |
| if (!x.equals(y) || !Config.selfJoinOpt) |
| fail; |
| Tree ny = subst(vy,vx,by); |
| Tree b = subst(s, |
| #<tuple(cmap(lambda(tuple(n,v), |
| if(call(eq,n,1),bag(v),bag())),s), |
| cmap(lambda(tuple(n,v), |
| if(call(eq,n,2),bag(v),bag())),s))>, |
| f); |
| Tree res = #<mapReduce(lambda(`vx,bag(tuple(`bx,tuple(1,`vx)), |
| tuple(`ny,tuple(2,`vx)))), |
| lambda(tuple(k,s),`b),`x,false)>; |
| res = simplify_all(rename(res)); |
| TypeInference.type_inference(res); |
| return translate(res); |
| case join(lambda(`vx,`bx),lambda(`vy,`by),`f,`x,`y): |
| return translate(#<mapReduce2(lambda(`vx,bag(tuple(`bx,`vx))), |
| lambda(`vy,bag(tuple(`by,`vy))), |
| `f,`x,`y,false)>); |
| case nth(`x,`n): |
| match TypeInference.type_inference2(x) { |
| case `S(tuple(...bl)): |
| if (!is_collection(S)) |
| fail; |
| Tree nv = new_var(); |
| type_env.insert(nv.toString(),bl.nth((int)n.longValue())); |
| return translate(#<cmap(lambda(`nv,`S(nth(`nv,`n))),`x)>); |
| }; |
| fail |
| case project(`x,`a): |
| match TypeInference.type_inference2(x) { |
| case `S(record(...bl)): |
| if (!is_collection(S)) |
| fail; |
| for ( Tree b: bl ) |
| match b { |
| case bind(`c,_): |
| if (!a.equals(c)) |
| fail; |
| Tree nv = new_var(); |
| type_env.insert(nv.toString(),#<record(...bl)>); |
| return translate(#<cmap(lambda(`nv,`S(project(`nv,`a))),`x)>); |
| }; |
| }; |
| fail |
| case provenance(`x,...s): |
| return #<provenance(`(translate(x)),...s)>; |
| case `f(...al): |
| Trees bl = #[]; |
| for ( Tree a: al ) |
| bl = bl.append(translate(a)); |
| return #<`f(...bl)>; |
| }; |
| return e; |
| } |
| |
| /** apply algebraic optimizations multiple times until no change */ |
| public static Tree translate_all ( Tree e ) { |
| Tree ne = translate(e); |
| if (e.equals(ne)) |
| return e; |
| else return translate_all(ne); |
| } |
| |
| /** |
| * does a form contain a bulk plan that doesn't refer to a given variable? |
| * @param e the form |
| * @param v the given variable |
| * @return true if e contains a bulk plan that doesn't refer to v |
| */ |
| private static boolean contains_plan ( Tree e, Tree v ) { |
| match e { |
| case lambda(`x,`u): return false; |
| case let(...): return false; |
| case Let(...): return false; |
| case `f(...as): |
| if (plan_names.member(#<`f>) && !free_variables(e,#[]).member(v)) |
| return true; |
| for (Tree a: as) |
| if (contains_plan(a,v)) |
| return true; |
| return false; |
| }; |
| return false; |
| } |
| |
| /** |
| * extract the common factors (common sub-expressions) from a form |
| * that do not refer to a given variable |
| * @param e the form |
| * @param v the given variable |
| * @return the list of common factors |
| */ |
| private static Trees common_factors ( Tree e, Tree v ) { |
| match e { |
| case lambda(`x,`u): return #[]; |
| case let(...): return #[]; |
| case Let(...): return #[]; |
| case `f(...as): |
| if (!contains_plan(e,v)) |
| fail; |
| if (plan_names.member(#<`f>) && !free_variables(e,#[]).member(v)) |
| return #[`e]; |
| Trees bs = #[]; |
| for ( Tree a: as ) |
| bs = bs.append(common_factors(a,v)); |
| return bs; |
| }; |
| return #[]; |
| } |
| |
| /** |
| * if a term is used multiple times in a query, factor it out using let-expressions |
| * @param e the expression to be factored-out |
| * @return the factored-out expression |
| */ |
| public static Tree common_factoring ( Tree e ) { |
| match e { |
| case `f(...as): |
| if (!plan_names.member(#<`f>)) |
| fail; |
| Trees bs = #[]; |
| Trees binds = #[]; |
| for ( Tree a: as ) |
| match a { |
| case lambda(`v,`u): |
| if (!contains_plan(u,v)) |
| fail; |
| Trees gs = common_factors(u,v); |
| Tree nb = u; |
| for ( Tree g: gs) { |
| Tree nv = new_var(); |
| nb = subst(g,nv,nb); |
| binds = binds.append(#<bind(`nv,`g)>); |
| }; |
| bs = bs.append(#<lambda(`v,`(common_factoring(nb)))>); |
| case _: bs = bs.append(common_factoring(a)); |
| }; |
| Tree res = #<`f(...bs)>; |
| for ( Tree bind: binds ) |
| match bind { |
| case bind(`v,`x): |
| res = #<let(`v,`x,`res)>; |
| }; |
| return res; |
| case `f(...as): |
| Trees bs = #[]; |
| for ( Tree a: as ) |
| bs = bs.append(common_factoring(a)); |
| return #<`f(...bs)>; |
| }; |
| return e; |
| } |
| } |