Teach fabric_util:get_db/2 about maintenance mode
If the node servicing a request does not have a shard for the db
involved then fabric_util:get_db/2 can return a shard from a node
which is in maintenance mode. If that node is a replacement node
that has not yet been brought into the cluster then the security
object will be empty.
Because fabric:get_security/2 calls fabric_util:get_db/2 and is in
the code path for authorizing requests at the HTTP layer, this can
result in live nodes returning 403s.
This commit replaces an rpc:call/4 with a rexi:cast/4 and adds
a new rpc endpoint in fabric_rpc for opening single shards. This
uses set_io_priority which will reply with a rexi_EXIT if
maintenance mode is set.
Closes COUCHDB-2325
diff --git a/src/fabric_rpc.erl b/src/fabric_rpc.erl
index eef9f30..b0b7776 100644
--- a/src/fabric_rpc.erl
+++ b/src/fabric_rpc.erl
@@ -18,7 +18,7 @@
-export([all_docs/3, changes/3, map_view/4, reduce_view/4, group_info/2]).
-export([create_db/1, delete_db/1, reset_validation_funs/1, set_security/3,
set_revs_limit/3, create_shard_db_doc/2, delete_shard_db_doc/2]).
--export([get_all_security/2]).
+-export([get_all_security/2, open_shard/2]).
-export([get_db_info/2, get_doc_count/2, get_update_seq/2,
changes/4, map_view/5, reduce_view/5, group_info/3]).
@@ -221,6 +221,15 @@
ok
end.
+open_shard(Name, Opts) ->
+ set_io_priority(Name, Opts),
+ case couch_db:open(Name, Opts) of
+ {ok, Db} ->
+ rexi:reply({ok, {ok, Db}});
+ Error ->
+ rexi:reply(Error)
+ end.
+
%%
%% internal
%%
diff --git a/src/fabric_util.erl b/src/fabric_util.erl
index 18ff578..d0b4ac9 100644
--- a/src/fabric_util.erl
+++ b/src/fabric_util.erl
@@ -176,15 +176,21 @@
get_shard([], _Opts, _Timeout, _Factor) ->
erlang:error({internal_server_error, "No DB shards could be opened."});
get_shard([#shard{node = Node, name = Name} | Rest], Opts, Timeout, Factor) ->
- case rpc:call(Node, couch_db, open, [Name, [{timeout, Timeout} | Opts]]) of
- {ok, Db} ->
- {ok, Db};
- {unauthorized, _} = Error ->
- throw(Error);
- {badrpc, {'EXIT', {timeout, _}}} ->
- get_shard(Rest, Opts, Factor * Timeout, Factor);
- _Else ->
- get_shard(Rest, Opts, Timeout, Factor)
+ Mon = rexi_monitor:start([rexi_utils:server_pid(Node)]),
+ MFA = {fabric_rpc, open_shard, [Name, [{timeout, Timeout} | Opts]]},
+ Ref = rexi:cast(Node, self(), MFA, [sync]),
+ try
+ receive {Ref, {ok, {ok, Db}}} ->
+ {ok, Db};
+ {Ref, {ok, {unauthorized, _} = Error}} ->
+ throw(Error);
+ _Else ->
+ get_shard(Rest, Opts, Timeout, Factor)
+ after Timeout ->
+ get_shard(Rest, Opts, Factor * Timeout, Factor)
+ end
+ after
+ rexi_monitor:stop(Mon)
end.
error_info({{<<"reduce_overflow_error">>, _} = Error, _Stack}) ->