| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.mrql; |
| |
| import org.apache.mrql.gen.*; |
| |
| |
| /** normalize algebraic expressions to more efficient forms using heuristic rules */ |
| public class Normalization extends Translator { |
| |
| /** given that pattern=e, find the bindings of the pattern variables */ |
| static Trees bind_pattern ( Tree pattern, Tree e ) { |
| Trees args = #[]; |
| match pattern { |
| case tuple(...pl): |
| int i = 0; |
| for ( Tree p: pl ) { |
| args = args.append(bind_pattern(p,#<nth(`e,`i)>)); |
| i++; |
| } |
| case record(...bl): |
| Trees attrs = #[]; |
| for ( Tree b: bl ) |
| match b { |
| case bind(`n,`p): |
| args = args.append(bind_pattern(p,#<project(`e,`n)>)); |
| if (attrs.member(n)) |
| error("Duplicate record attribute name: "+n); |
| attrs = attrs.append(n); |
| }; |
| case typed(`p,`t): |
| args = bind_pattern(p,#<typed(`e,`t)>); |
| case list(...pl): |
| int i = 0; |
| for ( Tree p: pl ) { |
| args = args.append(bind_pattern(p,#<index(`e,`i)>)); |
| i++; |
| }; |
| args = args.append(#<call(eq,call(count,`e),`i)>); |
| case call(`c,...s): |
| Tree ci = data_constructors.lookup(c.toString()); |
| if (ci == null) |
| error("Undefined data constructor: "+c); |
| match ci { |
| case `dname(`n,`tp): |
| args = args.append(#<call(eq,union_tag(`e),`n)>); |
| args = args.append(bind_pattern(s.length() == 1 ? s.head() : #<tuple(...s)>, |
| #<typed(union_value(`e),`tp)>)); |
| }; |
| case any: ; |
| case `v: |
| if (!v.is_variable()) // constant in pattern |
| args = #[call(eq,`e,`v)]; |
| else if (st.lookup(v.toString()) != null // repeated pattern variable |
| && !(e.is_variable() && st.lookup(v.toString()).is_variable())) // exception |
| args = #[call(eq,`e,`(st.lookup(v.toString())))]; |
| else st.insert(v.toString(),e); // new pattern variable |
| }; |
| return args; |
| } |
| |
| private static Tree make_tuple ( Trees pl ) { |
| if (pl.length() == 1) |
| return pl.head(); |
| return #<tuple(...pl)>; |
| } |
| |
| /** remove group-bys and order-bys from the MRQL queries */ |
| static Tree remove_groupby ( Tree e ) { |
| Tree ret = #<error>; |
| match e { |
| case select(distinct,`u,from(...bl),where(`c),groupby(...gl),orderby(...ol)): |
| ret = #<select(none,tuple(`u,`u),from(...bl),where(`c),groupby(...gl),orderby(...ol))>; |
| ret = #<cmap(lambda(tuple(key,group),list(key)),groupBy(`ret))>; |
| return remove_groupby(ret); |
| case select(none,`u,from(...bl),where(`c),groupby(),orderby()): |
| return remove_groupby(#<select(`u,from(...bl),where(`c))>); |
| case select(none,`u,from(...bl),where(`c),groupby(...gl),orderby(`l,...ol)): |
| Tree tol = make_tuple(ol); |
| ret = #<cmap(lambda(tuple(key,group),group), |
| orderBy(select(none,tuple(`tol,`u), |
| from(...bl), |
| where(`c),groupby(...gl),orderby())))>; |
| return (l.equals(#<none>)) |
| ? remove_groupby(ret) |
| : #<range(`(remove_groupby(ret)),0,`l)>; |
| case select(none,`u,from(...bl),where(`c),groupby(`h,...gl),orderby()): |
| Trees pl = #[]; |
| Trees ul = #[]; |
| Trees ql = #[]; |
| for ( Tree b: bl ) |
| match b { |
| case bind(`p,`d): |
| pl = pl.append(p); |
| }; |
| Trees pvs = #[]; |
| for ( Tree g: gl ) |
| match g { |
| case bind(`p,`d): |
| ql = ql.append(p); |
| ul = ul.append(d); |
| pvs = pvs.append(pattern_variables(p)); |
| }; |
| Tree tql = make_tuple(ql); |
| Tree tul = make_tuple(ul); |
| Tree tpl = make_tuple(pl); |
| Trees xl = #[]; |
| Trees partl = #[]; |
| for ( Tree x: pattern_variables(#<tuple(...pl)>) ) |
| if (!pvs.member(x)) { |
| partl = partl.append(#<bind(`x,`x)>); |
| match rename(#<select(`x,from(bind(`tpl,group)),where(true))>) { |
| case select(`hd,`binds,...): |
| xl = xl.append(#<bind(`x,bag(select(`hd,`binds,where(true))))>); |
| } |
| }; |
| match rename(#<select(record(...partl),from(bind(`tpl,group)),where(true))>) { |
| case select(`hd,`binds,...): |
| xl = xl.cons(#<bind(partition,bag(select(`hd,`binds,where(true))))>); |
| } |
| tpl = subst(#<any>,#<0>,tpl); |
| ret = #<select(`u,from(bind(tuple(`tql,group), |
| groupBy(select(tuple(`tul,`tpl),from(...bl),where(`c)))), |
| ...xl),where(`h))>; |
| return remove_groupby(ret); |
| case intersect(`x,`y): |
| return remove_groupby(#<select(x,from(bind(x,`x),bind(y,`y)), |
| where(call(eq,x,y)))>); |
| case except(`x,`y): |
| return remove_groupby(#<select(x,from(bind(x,`x)), |
| where(call(not,call(exists,select(y,from(bind(y,`y)), |
| where(call(eq,x,y)))))))>); |
| case member(`x,`y): |
| return remove_groupby(#<call(exists,select(y,from(bind(y,`y)), |
| where(call(eq,y,`x))))>); |
| case call(gen,`min,`max,`size): |
| return #<gen(`(remove_groupby(min)),`(remove_groupby(max)),`(remove_groupby(size)))>; |
| case call(avg,`s): |
| return remove_groupby(#<call(avg_value,call(avg_aggr,`s))>); |
| case call(`f,...al): |
| Tree macro = global_macros.lookup(f.toString()); |
| if (macro == null) |
| fail; |
| match macro { |
| case macro(params(...pl),`body): |
| Tree b = rename(remove_groupby(body)); |
| if (pl.length() != al.length()) |
| fail; |
| for ( ; !pl.is_empty(); pl = pl.tail(), al = al.tail() ) |
| b = subst(pl.head(),remove_groupby(al.head()),b); |
| return b; |
| } |
| case call(`f,...al): |
| if (#[cmap,join,mapReduce,mapReduce2,groupBy,orderBy,tuple,bag,list,set].member(f)) |
| return remove_groupby(#<`(f.toString())(...al)>); |
| else fail |
| case project(`x,`a): |
| return #<project(`(remove_groupby(x)),`a)>; |
| case `f(...al): |
| Trees bl = #[]; |
| for ( Tree a: al ) |
| bl = bl.append(remove_groupby(a)); |
| return #<`f(...bl)>; |
| case `v: |
| if (v.is_variable()) { |
| ret = global_vars.lookup(v.toString()); |
| if (ret == null) |
| return v; |
| else if (!v.equals(ret)) |
| return remove_groupby(ret); |
| } |
| }; |
| return e; |
| } |
| |
| private static Tree make_and ( Trees tests ) { |
| if (tests.is_empty()) |
| return #<true>; |
| Tree e = tests.head(); |
| for ( Tree t: tests.tail() ) |
| e = #<call(and,`e,`t)>; |
| return e; |
| } |
| |
| private static Trees rename_list ( Trees al ) { |
| Trees bl = #[]; |
| for ( Tree a: al ) |
| bl = bl.append(rename(a)); |
| return bl; |
| } |
| |
| /** compile away patterns and rename local variables of an MRQL expression e with unique names */ |
| static Tree rename ( Tree e ) { |
| Tree ret = #<error>; |
| match e { |
| case `v: |
| if (!v.is_variable()) |
| fail; |
| ret = st.lookup(v.toString()); |
| if (ret==null) |
| return v; |
| else return ret; |
| case select(`u,from(...bl),where(`c)): |
| st.begin_scope(); |
| Trees binds = #[]; |
| Trees tests = #[]; |
| for ( Tree b: bl ) |
| match b { |
| case bind(`p,`d): |
| Tree x = new_var(); |
| binds = binds.append(#<bind(`x,`(rename(d)))>); |
| tests = tests.append(bind_pattern(p,x)); |
| }; |
| c = make_and(tests.cons(c)); |
| ret = #<select(`(rename(u)), |
| from(...binds), |
| where(`(rename(c))))>; |
| st.end_scope(); |
| return ret; |
| case lambda(`p,`b): |
| st.begin_scope(); |
| Tree nv = new_var(); |
| if (!bind_pattern(p,nv).is_empty()) |
| error("Lambda patterns must be irrefutable: "+print_query(e)); |
| ret = #<lambda(`nv,`(rename(b)))>; |
| st.end_scope(); |
| return ret; |
| case function(tuple(...params),`outp,`body): |
| st.begin_scope(); |
| Trees ps = #[]; |
| Trees vs = #[]; |
| for ( Tree p: params ) |
| match p { |
| case `bind(`v,`tp): |
| Tree nv = new_var(); |
| if (vs.member(v)) |
| error("Duplicate function parameters: "+print_query(e)); |
| vs = vs.append(v); |
| ps = ps.append(#<`bind(`nv,`tp)>); |
| st.insert(v.toString(),nv); |
| }; |
| ret = #<function(tuple(...ps),`outp,`(rename(body)))>; |
| st.end_scope(); |
| return ret; |
| case let(`p,`u,`b): |
| Tree ne = rename(u); |
| st.begin_scope(); |
| Tree nv = new_var(); |
| if (!bind_pattern(p,nv).is_empty()) |
| error("Let patterns must be irrefutable: "+print_query(e)); |
| ret = #<let(`nv,`ne,`(rename(b)))>; |
| st.end_scope(); |
| return ret; |
| case case(`u,...cs): |
| Trees rs = cs.reverse(); |
| Tree nu = rename(u); |
| match rs.head() { |
| case case(`p,`b): |
| Trees conds = bind_pattern(p,nu); |
| if (!conds.is_empty()) |
| error("Non-exhaustive case "+print_query(p)+" in "+print_query(e)); |
| ret = b; |
| }; |
| for ( Tree c: rs.tail() ) |
| match c { |
| case case(`p,`b): |
| Trees conds = bind_pattern(p,nu); |
| if (!conds.is_empty()) |
| ret = #<if(`(make_and(conds)),`b,`ret)>; |
| else error("Unreachable case "+print_query(p)+" in "+print_query(e)); |
| }; |
| return rename(ret); |
| case project(`u,`a): |
| return #<project(`(rename(u)),`a)>; |
| case bind(`a,`u): |
| return #<bind(`a,`(rename(u)))>; |
| case loop(lambda(tuple(...vs),`b),`s,`n): |
| return #<loop(lambda(tuple(...vs),`(rename(b))),`(rename(s)),`n)>; |
| case `f(...al): |
| Trees bl = rename_list(al); |
| return #<`f(...bl)>; |
| }; |
| return e; |
| } |
| |
| private static Trees has_existential ( Tree e ) { |
| match e { |
| case call(and(`x,`y)): |
| Trees xs = has_existential(x); |
| Trees ys = has_existential(y); |
| return #[call(and(`(xs.head()),`(ys.head())),...(xs.tail()),...(ys.tail()))]; |
| case call(exists,select(...)): |
| return #[true,`e]; |
| case call(not,call(all,select(...l))): |
| return #[true,call(exists,select(...l))]; |
| }; |
| return #[`e]; |
| } |
| |
| /** normalize algebraic expressions to more efficient forms using heuristic rules */ |
| public static Tree normalize ( Tree e ) { |
| match e { |
| case select(`u,from(),where(true)): |
| return normalize(#<bag(`u)>); |
| case select(`u,from(),where(`p)): |
| return normalize(#<if(`p,bag(`u),bag())>); |
| case select(`u,from(bind(`v,`d)),where(true)): |
| if (u.equals(v)) |
| return normalize(d); |
| else fail |
| case select(`u,from(...bl,bind(`v,select(`iu,from(...ibl),where(`ic))),...al),where(`c)): |
| return normalize(#<select(`u,from(...bl,...ibl,bind(`v,bag(`iu)),...al), |
| where(call(and,`c,`ic)))>); |
| case select(`u,from(...bl,bind(`v,bag(`d)),...al),`c): |
| if (!is_pure(d) && occurences(v,#<f(`c,`u,...al)>) > 1) // duplicated side-effects |
| fail; |
| return normalize(#<select(`(subst(v,d,u)), |
| from(...bl,...(subst_list(v,d,al))), |
| `(subst(v,d,c)))>); |
| case select(`u,from(...bl,bind(`v,call(plus,`X,`Y)),...al),where(`c)): |
| return normalize(#<call(plus,select(`u,from(...bl,bind(`v,`X),...al),where(`c)), |
| select(`u,from(...bl,bind(`v,`Y),...al),where(`c)))>); |
| case select(`u,from(...bl),where(`c)): |
| Trees es = has_existential(c); |
| if (es.length() <= 1) |
| fail; |
| Trees binds = bl; |
| Trees preds = #[`(es.head())]; |
| for ( Tree x: es.tail() ) |
| match x { |
| case call(exists,select(`p,from(...bl2),where(`c2))): |
| preds = preds.cons(p).cons(c2); |
| binds = binds.append(bl2); |
| }; |
| return normalize(#<select(`u,from(...binds),where(`(make_and(preds))))>); |
| case let_bind(`v,`x,`y): |
| return #<let(`v,`(normalize(x)),`(normalize(y)))>; |
| case call(eq,tuple(...l),`x): |
| Tree pl = #<true>; |
| int i = 0; |
| for ( Tree y: l ) { |
| pl = #<call(and,`pl,call(eq,`y,nth(`x,`i)))>; |
| i++; |
| }; |
| return normalize(pl); |
| case call(eq,`x,tuple(...l)): |
| Tree pl = #<true>; |
| int i = 0; |
| for (Tree y: l) { |
| pl = #<call(and,`pl,call(eq,nth(`x,`i),`y))>; |
| i++; |
| }; |
| return normalize(pl); |
| case call(and,true,`u): return normalize(u); |
| case call(and,`u,true): return normalize(u); |
| case call(and,false,`u): return #<false>; |
| case call(and,`u,false): return #<false>; |
| case call(or,true,`u): return #<true>; |
| case call(or,`u,true): return #<true>; |
| case call(or,false,`u): return normalize(u); |
| case call(or,`u,false): return normalize(u); |
| case call(not,true): return #<false>; |
| case call(not,false): return #<true>; |
| case if(true,`e1,`e2): return normalize(e1); |
| case if(false,`e1,`e2): return normalize(e2); |
| case nth(tuple(...al),`n): |
| if (!n.is_long()) |
| fail; |
| int i = (int)n.longValue(); |
| if ( i >= 0 && i < al.length() ) |
| return normalize(al.nth(i)); |
| case project(record(...bl),`a): |
| for ( Tree b: bl ) |
| match b { |
| case bind(`v,`u): if (v.equals(a)) return normalize(u); |
| }; |
| error("Wrong projection: "+print_query(e)); |
| case `f(...al): |
| Trees bl = #[]; |
| for ( Tree a: al ) |
| bl = bl.append(normalize(a)); |
| return #<`f(...bl)>; |
| }; |
| return e; |
| } |
| |
| /** normalize algebraic expressions to more efficient forms using heuristic rules */ |
| public static Tree normalize_all ( Tree e ) { |
| Tree ne = normalize(e); |
| if (e.equals(ne)) |
| return e; |
| else return normalize(ne); |
| } |
| } |