blob: 0cc713ff2e0d6e7a32e816d569986a1c887130c8 [file] [log] [blame]
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.
-module(mango_idx_view).
-export([
validate_new/1,
validate_index_def/1,
add/2,
remove/2,
from_ddoc/1,
to_json/1,
is_usable/2,
columns/1,
start_key/1,
end_key/1,
indexable_fields/1,
field_ranges/1,
field_ranges/2
]).
-include_lib("couch/include/couch_db.hrl").
-include("mango.hrl").
-include("mango_idx.hrl").
validate_new(#idx{}=Idx) ->
{ok, Def} = do_validate(Idx#idx.def),
{ok, Idx#idx{def=Def}}.
validate_index_def(Def) ->
def_to_json(Def).
add(#doc{body={Props0}}=DDoc, Idx) ->
Views1 = case proplists:get_value(<<"views">>, Props0) of
{Views0} -> Views0;
_ -> []
end,
NewView = make_view(Idx),
Views2 = lists:keystore(element(1, NewView), 1, Views1, NewView),
Props1 = lists:keystore(<<"views">>, 1, Props0, {<<"views">>, {Views2}}),
{ok, DDoc#doc{body={Props1}}}.
remove(#doc{body={Props0}}=DDoc, Idx) ->
Views1 = case proplists:get_value(<<"views">>, Props0) of
{Views0} ->
Views0;
_ ->
?MANGO_ERROR({index_not_found, Idx#idx.name})
end,
Views2 = lists:keydelete(Idx#idx.name, 1, Views1),
if Views2 /= Views1 -> ok; true ->
?MANGO_ERROR({index_not_found, Idx#idx.name})
end,
Props1 = case Views2 of
[] ->
lists:keydelete(<<"views">>, 1, Props0);
_ ->
lists:keystore(<<"views">>, 1, Props0, {<<"views">>, {Views2}})
end,
{ok, DDoc#doc{body={Props1}}}.
from_ddoc({Props}) ->
case lists:keyfind(<<"views">>, 1, Props) of
{<<"views">>, {Views}} when is_list(Views) ->
lists:flatmap(fun({Name, {VProps}}) ->
case validate_ddoc(VProps) of
invalid_view ->
[];
{Def, Opts} ->
I = #idx{
type = <<"json">>,
name = Name,
def = Def,
opts = Opts
},
[I]
end
end, Views);
_ ->
[]
end.
to_json(Idx) ->
{[
{ddoc, Idx#idx.ddoc},
{name, Idx#idx.name},
{type, Idx#idx.type},
{def, {def_to_json(Idx#idx.def)}}
]}.
columns(Idx) ->
{Props} = Idx#idx.def,
{<<"fields">>, {Fields}} = lists:keyfind(<<"fields">>, 1, Props),
[Key || {Key, _} <- Fields].
is_usable(Idx, Selector) ->
% This index is usable if at least the first column is
% a member of the indexable fields of the selector.
Columns = columns(Idx),
Fields = indexable_fields(Selector),
lists:member(hd(Columns), Fields) andalso not is_text_search(Selector).
is_text_search({[]}) ->
false;
is_text_search({[{<<"$default">>, _}]}) ->
true;
is_text_search({[{_Field, Cond}]}) when is_list(Cond) ->
lists:foldl(fun(C, Exists) ->
Exists orelse is_text_search(C)
end, false, Cond);
is_text_search({[{_Field, Cond}]}) when is_tuple(Cond) ->
is_text_search(Cond);
is_text_search({[{_Field, _Cond}]}) ->
false;
%% we reached values, which should always be false
is_text_search(Val)
when is_number(Val); is_boolean(Val); is_binary(Val)->
false.
start_key([]) ->
[];
start_key([{'$gt', Key, _, _} | Rest]) ->
case mango_json:special(Key) of
true ->
[];
false ->
[Key | start_key(Rest)]
end;
start_key([{'$gte', Key, _, _} | Rest]) ->
false = mango_json:special(Key),
[Key | start_key(Rest)];
start_key([{'$eq', Key, '$eq', Key} | Rest]) ->
false = mango_json:special(Key),
[Key | start_key(Rest)].
end_key([]) ->
[{[]}];
end_key([{_, _, '$lt', Key} | Rest]) ->
case mango_json:special(Key) of
true ->
[{[]}];
false ->
[Key | end_key(Rest)]
end;
end_key([{_, _, '$lte', Key} | Rest]) ->
false = mango_json:special(Key),
[Key | end_key(Rest)];
end_key([{'$eq', Key, '$eq', Key} | Rest]) ->
false = mango_json:special(Key),
[Key | end_key(Rest)].
do_validate({Props}) ->
{ok, Opts} = mango_opts:validate(Props, opts()),
{ok, {Opts}};
do_validate(Else) ->
?MANGO_ERROR({invalid_index_json, Else}).
def_to_json({Props}) ->
def_to_json(Props);
def_to_json([]) ->
[];
def_to_json([{fields, Fields} | Rest]) ->
[{<<"fields">>, mango_sort:to_json(Fields)} | def_to_json(Rest)];
def_to_json([{<<"fields">>, Fields} | Rest]) ->
[{<<"fields">>, mango_sort:to_json(Fields)} | def_to_json(Rest)];
def_to_json([{Key, Value} | Rest]) ->
[{Key, Value} | def_to_json(Rest)].
opts() ->
[
{<<"fields">>, [
{tag, fields},
{validator, fun mango_opts:validate_sort/1}
]}
].
make_view(Idx) ->
View = {[
{<<"map">>, Idx#idx.def},
{<<"reduce">>, <<"_count">>},
{<<"options">>, {Idx#idx.opts}}
]},
{Idx#idx.name, View}.
validate_ddoc(VProps) ->
try
Def = proplists:get_value(<<"map">>, VProps),
validate_index_def(Def),
{Opts0} = proplists:get_value(<<"options">>, VProps),
Opts = lists:keydelete(<<"sort">>, 1, Opts0),
{Def, Opts}
catch Error:Reason ->
couch_log:error("Invalid Index Def ~p. Error: ~p, Reason: ~p",
[VProps, Error, Reason]),
invalid_view
end.
% This function returns a list of indexes that
% can be used to restrict this query. This works by
% searching the selector looking for field names that
% can be "seen".
%
% Operators that can be seen through are '$and' and any of
% the logical comparisons ('$lt', '$eq', etc). Things like
% '$regex', '$in', '$nin', and '$or' can't be serviced by
% a single index scan so we disallow them. In the future
% we may become more clever and increase our ken such that
% we will be able to see through these with crafty indexes
% or new uses for existing indexes. For instance, I could
% see an '$or' between comparisons on the same field becoming
% the equivalent of a multi-query. But that's for another
% day.
% We can see through '$and' trivially
indexable_fields({[{<<"$and">>, Args}]}) ->
lists:usort(lists:flatten([indexable_fields(A) || A <- Args]));
% So far we can't see through any other operator
indexable_fields({[{<<"$", _/binary>>, _}]}) ->
[];
% If we have a field with a terminator that is locatable
% using an index then the field is a possible index
indexable_fields({[{Field, Cond}]}) ->
case indexable(Cond) of
true ->
[Field];
false ->
[]
end;
% An empty selector
indexable_fields({[]}) ->
[].
% Check if a condition is indexable. The logical
% comparisons are mostly straight forward. We
% currently don't understand '$in' which is
% theoretically supportable. '$nin' and '$ne'
% aren't currently supported because they require
% multiple index scans.
indexable({[{<<"$lt">>, _}]}) ->
true;
indexable({[{<<"$lte">>, _}]}) ->
true;
indexable({[{<<"$eq">>, _}]}) ->
true;
indexable({[{<<"$gt">>, _}]}) ->
true;
indexable({[{<<"$gte">>, _}]}) ->
true;
% All other operators are currently not indexable.
% This is also a subtle assertion that we don't
% call indexable/1 on a field name.
indexable({[{<<"$", _/binary>>, _}]}) ->
false.
% For each field, return {Field, Range}
field_ranges(Selector) ->
Fields = indexable_fields(Selector),
field_ranges(Selector, Fields).
field_ranges(Selector, Fields) ->
field_ranges(Selector, Fields, []).
field_ranges(_Selector, [], Acc) ->
lists:reverse(Acc);
field_ranges(Selector, [Field | Rest], Acc) ->
case range(Selector, Field) of
empty ->
[{Field, empty}];
Range ->
field_ranges(Selector, Rest, [{Field, Range} | Acc])
end.
% Find the complete range for a given index in this
% selector. This works by AND'ing logical comparisons
% together so that we can define the start and end
% keys for a given index.
%
% Selector must have been normalized before calling
% this function.
range(Selector, Index) ->
range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()).
% Adjust Low and High based on values found for the
% givend Index in Selector.
range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) ->
lists:foldl(fun
(Arg, {LC, L, HC, H}) ->
range(Arg, Index, LC, L, HC, H);
(_Arg, empty) ->
empty
end, {LCmp, Low, HCmp, High}, Args);
% We can currently only traverse '$and' operators
range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) ->
{LCmp, Low, HCmp, High};
% If the field name matches the index see if we can narrow
% the acceptable range.
range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) ->
range(Cond, LCmp, Low, HCmp, High);
% Else we have a field unrelated to this index so just
% return the current values.
range(_, _, LCmp, Low, HCmp, High) ->
{LCmp, Low, HCmp, High}.
% The comments below are a bit cryptic at first but they show
% where the Arg cand land in the current range.
%
% For instance, given:
%
% {$lt: N}
% Low = 1
% High = 5
%
% Depending on the value of N we can have one of five locations
% in regards to a given Low/High pair:
%
% min low mid high max
%
% That is:
% min = (N < Low)
% low = (N == Low)
% mid = (Low < N < High)
% high = (N == High)
% max = (High < N)
%
% If N < 1, (min) then the effective range is empty.
%
% If N == 1, (low) then we have to set the range to empty because
% N < 1 && N >= 1 is an empty set. If the operator had been '$lte'
% and LCmp was '$gte' or '$eq' then we could keep around the equality
% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg.
%
% If 1 < N < 5 (mid), then we set High to Arg and Arg has just
% narrowed our range. HCmp is set the the '$lt' operator that was
% part of the input.
%
% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed
% to be equally or more restrictive than the current possible values
% of '$lt' or '$lte'.
%
% If N > 5 (max), nothing changes as our current range is already
% more narrow than the current condition.
%
% Obviously all of that logic gets tweaked for the other logical
% operators but its all straight forward once you figure out how
% we're basically just narrowing our logical ranges.
range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
case range_pos(Low, Arg, High) of
min ->
empty;
low ->
empty;
mid ->
{LCmp, Low, '$lt', Arg};
high ->
{LCmp, Low, '$lt', Arg};
max ->
{LCmp, Low, HCmp, High}
end;
range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) ->
case range_pos(Low, Arg, High) of
min ->
empty;
low when LCmp == '$gte'; LCmp == '$eq' ->
{'$eq', Arg, '$eq', Arg};
low ->
empty;
mid ->
{LCmp, Low, '$lte', Arg};
high ->
{LCmp, Low, HCmp, High};
max ->
{LCmp, Low, HCmp, High}
end;
range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) ->
case range_pos(Low, Arg, High) of
min ->
empty;
low when LCmp == '$gte'; LCmp == '$eq' ->
{'$eq', Arg, '$eq', Arg};
low ->
empty;
mid ->
{'$eq', Arg, '$eq', Arg};
high when HCmp == '$lte'; HCmp == '$eq' ->
{'$eq', Arg, '$eq', Arg};
high ->
empty;
max ->
empty
end;
range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) ->
case range_pos(Low, Arg, High) of
min ->
{LCmp, Low, HCmp, High};
low ->
{LCmp, Low, HCmp, High};
mid ->
{'$gte', Arg, HCmp, High};
high when HCmp == '$lte'; HCmp == '$eq' ->
{'$eq', Arg, '$eq', Arg};
high ->
empty;
max ->
empty
end;
range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) ->
case range_pos(Low, Arg, High) of
min ->
{LCmp, Low, HCmp, High};
low ->
{'$gt', Arg, HCmp, High};
mid ->
{'$gt', Arg, HCmp, High};
high ->
empty;
max ->
empty
end;
% There's some other un-indexable restriction on the index
% that will be applied as a post-filter. Ignore it and
% carry on our merry way.
range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) ->
{LCmp, Low, HCmp, High}.
% Returns the value min | low | mid | high | max depending
% on how Arg compares to Low and High.
range_pos(Low, Arg, High) ->
case mango_json:cmp(Arg, Low) of
N when N < 0 -> min;
N when N == 0 -> low;
_ ->
case mango_json:cmp(Arg, High) of
X when X < 0 ->
mid;
X when X == 0 ->
high;
_ ->
max
end
end.