use text prefix in regex to speed up query
for selector;
{"selector":{"_id":{"$regex":"doc.+"}}}
before;
{
"include_docs": true,
"view_type": "map",
"reduce": false,
"partition": null,
"start_key": [],
"end_key": [
"<MAX>"
],
"direction": "fwd",
"stable": false,
"update": true,
"conflicts": "undefined"
}
after;
{
"include_docs": true,
"view_type": "map",
"reduce": false,
"partition": null,
"start_key": [
"doc"
],
"end_key": [
"doc�",
"<MAX>"
],
"direction": "fwd",
"stable": false,
"update": true,
"conflicts": "undefined"
}
closes: https://github.com/apache/couchdb/issues/4775
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index 25d75d5..0bdd4ab 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -35,6 +35,8 @@
-include("mango.hrl").
-include("mango_idx_view.hrl").
+-define(PREFIX_RE, "^\\w+").
+
validate_new(#idx{} = Idx, _Db) ->
{ok, Def} = do_validate(Idx#idx.def),
{ok, Idx#idx{def = Def}}.
@@ -310,6 +312,8 @@
% Making `$exists` indexable should not cause problems in other cases.
indexable({[{<<"$exists">>, _}]}) ->
true;
+indexable({[{<<"$regex">>, _}]}) ->
+ true;
% All other operators are currently not indexable.
% This is also a subtle assertion that we don't
% call indexable/1 on a field name.
@@ -485,6 +489,14 @@
max ->
empty
end;
+% use any text prefix in the regex to narrow the query
+range({[{<<"$regex">>, Arg}]}, LCmp, Low, HCmp, High) ->
+ case re:run(Arg, ?PREFIX_RE, [{capture, first, binary}]) of
+ {match, [Prefix]} ->
+ {'$gte', Prefix, '$lte', <<Prefix/binary, 16#10FFFF>>};
+ nomatch ->
+ {LCmp, Low, HCmp, High}
+ end;
% There's some other un-indexable restriction on the index
% that will be applied as a post-filter. Ignore it and
% carry on our merry way.