blob: 691aac7edd936993fd4d90f9ad9d5c6e99794cba [file] [log] [blame]
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.
-module(mango_selector).
-export([
normalize/1,
match/2
]).
-include_lib("couch/include/couch_db.hrl").
-include("mango.hrl").
% Validate and normalize each operator. This translates
% every selector operator into a consistent version that
% we can then rely on for all other selector functions.
% See the definition of each step below for more information
% on what each one does.
normalize({[]}) ->
{[]};
normalize(Selector) ->
Steps = [
fun norm_ops/1,
fun norm_fields/1,
fun norm_negations/1
],
{NProps} = lists:foldl(fun(Step, Sel) -> Step(Sel) end, Selector, Steps),
FieldNames = [Name || {Name, _} <- NProps],
case lists:member(<<>>, FieldNames) of
true ->
?MANGO_ERROR({invalid_selector, missing_field_name});
false ->
ok
end,
{NProps}.
% Match a selector against a #doc{} or EJSON value.
% This assumes that the Selector has been normalized.
% Returns true or false.
% An empty selector matches any value.
match({[]}, _) ->
true;
match(Selector, #doc{body=Body}) ->
match(Selector, Body, fun mango_json:cmp/2);
match(Selector, {Props}) ->
match(Selector, {Props}, fun mango_json:cmp/2).
% Convert each operator into a normalized version as well
% as convert an implict operators into their explicit
% versions.
norm_ops({[{<<"$and">>, Args}]}) when is_list(Args) ->
{[{<<"$and">>, [norm_ops(A) || A <- Args]}]};
norm_ops({[{<<"$and">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$and', Arg});
norm_ops({[{<<"$or">>, Args}]}) when is_list(Args) ->
{[{<<"$or">>, [norm_ops(A) || A <- Args]}]};
norm_ops({[{<<"$or">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$or', Arg});
norm_ops({[{<<"$not">>, {_}=Arg}]}) ->
{[{<<"$not">>, norm_ops(Arg)}]};
norm_ops({[{<<"$not">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$not', Arg});
norm_ops({[{<<"$nor">>, Args}]}) when is_list(Args) ->
{[{<<"$nor">>, [norm_ops(A) || A <- Args]}]};
norm_ops({[{<<"$nor">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$nor', Arg});
norm_ops({[{<<"$in">>, Args}]} = Cond) when is_list(Args) ->
Cond;
norm_ops({[{<<"$in">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$in', Arg});
norm_ops({[{<<"$nin">>, Args}]} = Cond) when is_list(Args) ->
Cond;
norm_ops({[{<<"$nin">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$nin', Arg});
norm_ops({[{<<"$exists">>, Arg}]} = Cond) when is_boolean(Arg) ->
Cond;
norm_ops({[{<<"$exists">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$exists', Arg});
norm_ops({[{<<"$type">>, Arg}]} = Cond) when is_binary(Arg) ->
Cond;
norm_ops({[{<<"$type">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$type', Arg});
norm_ops({[{<<"$mod">>, [D, R]}]} = Cond) when is_integer(D), is_integer(R) ->
Cond;
norm_ops({[{<<"$mod">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$mod', Arg});
norm_ops({[{<<"$regex">>, Regex}]} = Cond) when is_binary(Regex) ->
case re:compile(Regex) of
{ok, _} ->
Cond;
_ ->
?MANGO_ERROR({bad_arg, '$regex', Regex})
end;
norm_ops({[{<<"$all">>, Args}]}) when is_list(Args) ->
{[{<<"$all">>, Args}]};
norm_ops({[{<<"$all">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$all', Arg});
norm_ops({[{<<"$elemMatch">>, {_}=Arg}]}) ->
{[{<<"$elemMatch">>, norm_ops(Arg)}]};
norm_ops({[{<<"$elemMatch">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$elemMatch', Arg});
norm_ops({[{<<"$allMatch">>, {_}=Arg}]}) ->
{[{<<"$allMatch">>, norm_ops(Arg)}]};
norm_ops({[{<<"$allMatch">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$allMatch', Arg});
norm_ops({[{<<"$size">>, Arg}]}) when is_integer(Arg), Arg >= 0 ->
{[{<<"$size">>, Arg}]};
norm_ops({[{<<"$size">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$size', Arg});
norm_ops({[{<<"$text">>, Arg}]}) when is_binary(Arg); is_number(Arg);
is_boolean(Arg) ->
{[{<<"$default">>, {[{<<"$text">>, Arg}]}}]};
norm_ops({[{<<"$text">>, Arg}]}) ->
?MANGO_ERROR({bad_arg, '$text', Arg});
% Not technically an operator but we pass it through here
% so that this function accepts its own output. This exists
% so that $text can have a field name value which simplifies
% logic elsewhere.
norm_ops({[{<<"$default">>, _}]} = Selector) ->
Selector;
% Terminals where we can't perform any validation
% on the value because any value is acceptable.
norm_ops({[{<<"$lt">>, _}]} = Cond) ->
Cond;
norm_ops({[{<<"$lte">>, _}]} = Cond) ->
Cond;
norm_ops({[{<<"$eq">>, _}]} = Cond) ->
Cond;
norm_ops({[{<<"$ne">>, _}]} = Cond) ->
Cond;
norm_ops({[{<<"$gte">>, _}]} = Cond) ->
Cond;
norm_ops({[{<<"$gt">>, _}]} = Cond) ->
Cond;
% Known but unsupported operators
norm_ops({[{<<"$where">>, _}]}) ->
?MANGO_ERROR({not_supported, '$where'});
norm_ops({[{<<"$geoWithin">>, _}]}) ->
?MANGO_ERROR({not_supported, '$geoWithin'});
norm_ops({[{<<"$geoIntersects">>, _}]}) ->
?MANGO_ERROR({not_supported, '$geoIntersects'});
norm_ops({[{<<"$near">>, _}]}) ->
?MANGO_ERROR({not_supported, '$near'});
norm_ops({[{<<"$nearSphere">>, _}]}) ->
?MANGO_ERROR({not_supported, '$nearSphere'});
% Unknown operator
norm_ops({[{<<"$", _/binary>>=Op, _}]}) ->
?MANGO_ERROR({invalid_operator, Op});
% A {Field: Cond} pair
norm_ops({[{Field, Cond}]}) ->
{[{Field, norm_ops(Cond)}]};
% An implicit $and
norm_ops({Props}) when length(Props) > 1 ->
{[{<<"$and">>, [norm_ops({[P]}) || P <- Props]}]};
% A bare value condition means equality
norm_ops(Value) ->
{[{<<"$eq">>, Value}]}.
% This takes a selector and normalizes all of the
% field names as far as possible. For instance:
%
% Unnormalized:
% {foo: {$and: [{$gt: 5}, {$lt: 10}]}}
%
% Normalized:
% {$and: [{foo: {$gt: 5}}, {foo: {$lt: 10}}]}
%
% And another example:
%
% Unnormalized:
% {foo: {bar: {$gt: 10}}}
%
% Normalized:
% {"foo.bar": {$gt: 10}}
%
% Its important to note that we can only normalize
% field names like this through boolean operators where
% we can gaurantee commutativity. We can't necessarily
% do the same through the '$elemMatch' or '$allMatch'
% operators but we can apply the same algorithm to its
% arguments.
norm_fields({[]}) ->
{[]};
norm_fields(Selector) ->
norm_fields(Selector, <<>>).
% Operators where we can push the field names further
% down the operator tree
norm_fields({[{<<"$and">>, Args}]}, Path) ->
{[{<<"$and">>, [norm_fields(A, Path) || A <- Args]}]};
norm_fields({[{<<"$or">>, Args}]}, Path) ->
{[{<<"$or">>, [norm_fields(A, Path) || A <- Args]}]};
norm_fields({[{<<"$not">>, Arg}]}, Path) ->
{[{<<"$not">>, norm_fields(Arg, Path)}]};
norm_fields({[{<<"$nor">>, Args}]}, Path) ->
{[{<<"$nor">>, [norm_fields(A, Path) || A <- Args]}]};
% Fields where we can normalize fields in the
% operator arguments independently.
norm_fields({[{<<"$elemMatch">>, Arg}]}, Path) ->
Cond = {[{<<"$elemMatch">>, norm_fields(Arg)}]},
{[{Path, Cond}]};
norm_fields({[{<<"$allMatch">>, Arg}]}, Path) ->
Cond = {[{<<"$allMatch">>, norm_fields(Arg)}]},
{[{Path, Cond}]};
% The text operator operates against the internal
% $default field. This also asserts that the $default
% field is at the root as well as that it only has
% a $text operator applied.
norm_fields({[{<<"$default">>, {[{<<"$text">>, _Arg}]}}]}=Sel, <<>>) ->
Sel;
norm_fields({[{<<"$default">>, _}]} = Selector, _) ->
?MANGO_ERROR({bad_field, Selector});
% Any other operator is a terminal below which no
% field names should exist. Set the path to this
% terminal and return it.
norm_fields({[{<<"$", _/binary>>, _}]} = Cond, Path) ->
{[{Path, Cond}]};
% We've found a field name. Append it to the path
% and skip this node as we unroll the stack as
% the full path will be further down the branch.
norm_fields({[{Field, Cond}]}, <<>>) ->
% Don't include the '.' for the first element of
% the path.
norm_fields(Cond, Field);
norm_fields({[{Field, Cond}]}, Path) ->
norm_fields(Cond, <<Path/binary, ".", Field/binary>>);
% An empty selector
norm_fields({[]}, Path) ->
{Path, {[]}};
% Else we have an invalid selector
norm_fields(BadSelector, _) ->
?MANGO_ERROR({bad_field, BadSelector}).
% Take all the negation operators and move the logic
% as far down the branch as possible. This does things
% like:
%
% Unnormalized:
% {$not: {foo: {$gt: 10}}}
%
% Normalized:
% {foo: {$lte: 10}}
%
% And we also apply DeMorgan's laws
%
% Unnormalized:
% {$not: {$and: [{foo: {$gt: 10}}, {foo: {$lt: 5}}]}}
%
% Normalized:
% {$or: [{foo: {$lte: 10}}, {foo: {$gte: 5}}]}
%
% This logic is important because we can't "see" through
% a '$not' operator to be able to locate indices that may
% service a specific query. Though if we move the negations
% down to the terminals we may be able to negate specific
% operators which allows us to find usable indices.
% Operators that cause a negation
norm_negations({[{<<"$not">>, Arg}]}) ->
negate(Arg);
norm_negations({[{<<"$nor">>, Args}]}) ->
{[{<<"$and">>, [negate(A) || A <- Args]}]};
% Operators that we merely seek through as we look for
% negations.
norm_negations({[{<<"$and">>, Args}]}) ->
{[{<<"$and">>, [norm_negations(A) || A <- Args]}]};
norm_negations({[{<<"$or">>, Args}]}) ->
{[{<<"$or">>, [norm_negations(A) || A <- Args]}]};
norm_negations({[{<<"$elemMatch">>, Arg}]}) ->
{[{<<"$elemMatch">>, norm_negations(Arg)}]};
norm_negations({[{<<"$allMatch">>, Arg}]}) ->
{[{<<"$allMatch">>, norm_negations(Arg)}]};
% All other conditions can't introduce negations anywhere
% further down the operator tree.
norm_negations(Cond) ->
Cond.
% Actually negate an expression. Make sure and read up
% on DeMorgan's laws if you're trying to read this, but
% in a nutshell:
%
% NOT(a AND b) == NOT(a) OR NOT(b)
% NOT(a OR b) == NOT(a) AND NOT(b)
%
% Also notice that if a negation hits another negation
% operator that we just nullify the combination. Its
% possible that below the nullification we have more
% negations so we have to recurse back to norm_negations/1.
% Negating negation, nullify but recurse to
% norm_negations/1
negate({[{<<"$not">>, Arg}]}) ->
norm_negations(Arg);
negate({[{<<"$nor">>, Args}]}) ->
{[{<<"$or">>, [norm_negations(A) || A <- Args]}]};
% DeMorgan Negations
negate({[{<<"$and">>, Args}]}) ->
{[{<<"$or">>, [negate(A) || A <- Args]}]};
negate({[{<<"$or">>, Args}]}) ->
{[{<<"$and">>, [negate(A) || A <- Args]}]};
negate({[{<<"$default">>, _}]} = Arg) ->
?MANGO_ERROR({bad_arg, '$not', Arg});
% Negating comparison operators is straight forward
negate({[{<<"$lt">>, Arg}]}) ->
{[{<<"$gte">>, Arg}]};
negate({[{<<"$lte">>, Arg}]}) ->
{[{<<"$gt">>, Arg}]};
negate({[{<<"$eq">>, Arg}]}) ->
{[{<<"$ne">>, Arg}]};
negate({[{<<"$ne">>, Arg}]}) ->
{[{<<"$eq">>, Arg}]};
negate({[{<<"$gte">>, Arg}]}) ->
{[{<<"$lt">>, Arg}]};
negate({[{<<"$gt">>, Arg}]}) ->
{[{<<"$lte">>, Arg}]};
negate({[{<<"$in">>, Args}]}) ->
{[{<<"$nin">>, Args}]};
negate({[{<<"$nin">>, Args}]}) ->
{[{<<"$in">>, Args}]};
% We can also trivially negate the exists operator
negate({[{<<"$exists">>, Arg}]}) ->
{[{<<"$exists">>, not Arg}]};
% Anything else we have to just terminate the
% negation by reinserting the negation operator
negate({[{<<"$", _/binary>>, _}]} = Cond) ->
{[{<<"$not">>, Cond}]};
% Finally, negating a field just means we negate its
% condition.
negate({[{Field, Cond}]}) ->
{[{Field, negate(Cond)}]}.
match({[{<<"$and">>, Args}]}, Value, Cmp) ->
Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end,
lists:all(Pred, Args);
match({[{<<"$or">>, Args}]}, Value, Cmp) ->
Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end,
lists:any(Pred, Args);
match({[{<<"$not">>, Arg}]}, Value, Cmp) ->
not match(Arg, Value, Cmp);
% All of the values in Args must exist in Values or
% Values == hd(Args) if Args is a single element list
% that contains a list.
match({[{<<"$all">>, Args}]}, Values, _Cmp) when is_list(Values) ->
Pred = fun(A) -> lists:member(A, Values) end,
HasArgs = lists:all(Pred, Args),
IsArgs = case Args of
[A] when is_list(A) ->
A == Values;
_ ->
false
end,
HasArgs orelse IsArgs;
match({[{<<"$all">>, _Args}]}, _Values, _Cmp) ->
false;
%% This is for $elemMatch, $allMatch, and possibly $in because of our normalizer.
%% A selector such as {"field_name": {"$elemMatch": {"$gte": 80, "$lt": 85}}}
%% gets normalized to:
%% {[{<<"field_name">>,
%% {[{<<"$elemMatch">>,
%% {[{<<"$and">>, [
%% {[{<<>>,{[{<<"$gte">>,80}]}}]},
%% {[{<<>>,{[{<<"$lt">>,85}]}}]}
%% ]}]}
%% }]}
%% }]}.
%% So we filter out the <<>>.
match({[{<<>>, Arg}]}, Values, Cmp) ->
match(Arg, Values, Cmp);
% Matches when any element in values matches the
% sub-selector Arg.
match({[{<<"$elemMatch">>, Arg}]}, Values, Cmp) when is_list(Values) ->
try
lists:foreach(fun(V) ->
case match(Arg, V, Cmp) of
true -> throw(matched);
_ -> ok
end
end, Values),
false
catch
throw:matched ->
true;
_:_ ->
false
end;
match({[{<<"$elemMatch">>, _Arg}]}, _Value, _Cmp) ->
false;
% Matches when all elements in values match the
% sub-selector Arg.
match({[{<<"$allMatch">>, Arg}]}, Values, Cmp) when is_list(Values) ->
try
lists:foreach(fun(V) ->
case match(Arg, V, Cmp) of
false -> throw(unmatched);
_ -> ok
end
end, Values),
true
catch
_:_ ->
false
end;
match({[{<<"$allMatch">>, _Arg}]}, _Value, _Cmp) ->
false;
% Our comparison operators are fairly straight forward
match({[{<<"$lt">>, Arg}]}, Value, Cmp) ->
Cmp(Value, Arg) < 0;
match({[{<<"$lte">>, Arg}]}, Value, Cmp) ->
Cmp(Value, Arg) =< 0;
match({[{<<"$eq">>, Arg}]}, Value, Cmp) ->
Cmp(Value, Arg) == 0;
match({[{<<"$ne">>, Arg}]}, Value, Cmp) ->
Cmp(Value, Arg) /= 0;
match({[{<<"$gte">>, Arg}]}, Value, Cmp) ->
Cmp(Value, Arg) >= 0;
match({[{<<"$gt">>, Arg}]}, Value, Cmp) ->
Cmp(Value, Arg) > 0;
match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values)->
Pred = fun(Arg) ->
lists:foldl(fun(Value,Match) ->
(Cmp(Value, Arg) == 0) or Match
end, false, Values)
end,
lists:any(Pred, Args);
match({[{<<"$in">>, Args}]}, Value, Cmp) ->
Pred = fun(Arg) -> Cmp(Value, Arg) == 0 end,
lists:any(Pred, Args);
match({[{<<"$nin">>, Args}]}, Values, Cmp) when is_list(Values)->
not match({[{<<"$in">>, Args}]}, Values, Cmp);
match({[{<<"$nin">>, Args}]}, Value, Cmp) ->
Pred = fun(Arg) -> Cmp(Value, Arg) /= 0 end,
lists:all(Pred, Args);
% This logic is a bit subtle. Basically, if value is
% not undefined, then it exists.
match({[{<<"$exists">>, ShouldExist}]}, Value, _Cmp) ->
Exists = Value /= undefined,
ShouldExist andalso Exists;
match({[{<<"$type">>, Arg}]}, Value, _Cmp) when is_binary(Arg) ->
Arg == mango_json:type(Value);
match({[{<<"$mod">>, [D, R]}]}, Value, _Cmp) when is_integer(Value) ->
Value rem D == R;
match({[{<<"$mod">>, _}]}, _Value, _Cmp) ->
false;
match({[{<<"$regex">>, Regex}]}, Value, _Cmp) when is_binary(Value) ->
try
match == re:run(Value, Regex, [{capture, none}])
catch _:_ ->
false
end;
match({[{<<"$regex">>, _}]}, _Value, _Cmp) ->
false;
match({[{<<"$size">>, Arg}]}, Values, _Cmp) when is_list(Values) ->
length(Values) == Arg;
match({[{<<"$size">>, _}]}, _Value, _Cmp) ->
false;
% We don't have any choice but to believe that the text
% index returned valid matches
match({[{<<"$default">>, _}]}, _Value, _Cmp) ->
true;
% All other operators are internal assertion errors for
% matching because we either should've removed them during
% normalization or something else broke.
match({[{<<"$", _/binary>>=Op, _}]}, _, _) ->
?MANGO_ERROR({invalid_operator, Op});
% We need to traverse value to find field. The call to
% mango_doc:get_field/2 may return either not_found or
% bad_path in which case matching fails.
match({[{Field, Cond}]}, Value, Cmp) ->
case mango_doc:get_field(Value, Field) of
not_found when Cond == {[{<<"$exists">>, false}]} ->
true;
not_found ->
false;
bad_path ->
false;
SubValue when Field == <<"_id">> ->
match(Cond, SubValue, fun mango_json:cmp_raw/2);
SubValue ->
match(Cond, SubValue, Cmp)
end;
match({Props} = Sel, _Value, _Cmp) when length(Props) > 1 ->
erlang:error({unnormalized_selector, Sel}).