blob: be0157d0c3b23765af7e904f53f61326fae46cdf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.document;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
/**
* A ClusterView represents the state of a cluster at a particular moment in
* time.
* <p>
* This is a combination of what is stored in the ClusterViewDocument and the
* list of instances that currently have a backlog.
* <p>
* In order to be able to differentiate and clearly identify the different
* states an instance is in, the ClusterView uses a slightly different
* terminology of states that it reports:
* <ul>
* <li>Active: (same as in the ClusterViewDocument) an instance that is alive
* and has no recoveryLock set. Whether or not the lease has timed out is
* ignored. If the lease would be timed out, this would be immediately noticed
* by one of the instances and the affected instance would thus be recovered
* soon.</li>
* <li>Deactivating: An instance that is either recovering (which is the state
* reported from the ClusterViewDocument) - ie it was active until now but the
* lease has just timed out and one of the peer instances noticed so it does a
* recovery now - or it is inactive but some of its changes are still in the
* backlog (the latter is not tracked in the ClusterViewDocument, instead
* instances with a backlog are in the 'inactive' bucket there).</li>
* <li>Inactive: An instance that is both inactive from a
* clusterNodes/ClusterViewDocument point of view (ie no longer active and
* already recovered) and it has no backlog anymore.</li>
* </ul>
* The JSON generated by the ClusterView (which is propagated to JMX) has the
* following fields:
* <ul>
* <li>seq = sequence number: this is a monotonically increasing number assigned
* to each incarnation of the persisted clusterView (in the settings
* collection). It can be used to take note of the fact that a view has changed
* even though perhaps all activeIds are still the same (eg when the listener
* would have missed a few changes). It can also be used to tell with certainty
* that 'anything has changed' compared to the clusterView with a previous
* sequence number</li>
* <li>final = is final: this is a boolean indicating whether or not the view
* with a particular sequence number is final (not going to change anymore) or
* whether the discovery lite takes the freedom to modify the view in the future
* (false). So whenever 'final' is false, then the view must be treated as 'in
* flux' and perhaps the user should wait with doing any conclusions. That's not
* to say that if 'final' is false, that the information provided in
* active/deactivating/inactive is wrong - that's of course not the case - that
* info is always correct. But when 'final' is false it just means that
* active/deactivating/inactive for a given sequence number might change.</li>
* <li>id = cluster view id: this is the unique, stable identifier of the local
* cluster. The idea of this id is to provide both an actual identifier for the
* local cluster as well as a 'namespace' for the instanceIds therein. The
* instanceIds are all just simply integers and can of course be the same for
* instances in different clusters.</li>
* <li>me = my local instance id: this is the id of the local instance as
* managed by DocumentNodeStore</li>
* <li>active = active instance ids: this is the list of instance ids that are
* all currently active in the local cluster. The ids are managed by
* DocumentNodeStore</li>
* <li>deactivating = deactivating instance ids: this is the list of instance
* ids that are all in the process of deactivating and for which therefore some
* data might still be making its way to the local instance. So any changes that
* were done by instances that are deactivating might not yet be visible locally
* </li>
* <li>deactive = deactive instance ids: this is the list of instance ids that
* are not running nor do they have any data pending to become visible by the
* local instance</li>
* </ul>
*/
class ClusterView {
/**
* the json containing the complete information of the state of this
* ClusterView. Created at constructor time for performance reasons (json
* will be polled via JMX very frequently, thus must be provided fast)
*/
private final String json;
/**
* Factory method that creates a ClusterView given a ClusterViewDocument and
* a list of instances that currently have a backlog.
* <p>
* The ClusterViewDocument contains instances in the following states:
* <ul>
* <li>active</li>
* <li>recovering</li>
* <li>inactive</li>
* </ul>
* The ClusterView however reports these upwards as follows:
* <ul>
* <li>active: this is 1:1 the active ones from the ClusterViewDocument</li>
* <li>deactivating: this includes the recovering ones from the
* ClusterViewDocument plus those passed to this method in the backlogIds
* parameter</li>
* <li>inactive: this is the inactive ones from the ClusterViewDocument
* <b>minus</li> the backlogIds passed</li>
* </ul>
*
* @param localInstanceId
* the id of the local instance (me)
* @param clusterViewDoc
* the ClusterViewDocument which contains the currently persisted
* cluster view
* @param backlogIds
* the ids that the local instances still has not finished a
* background read for and thus still have a backlog
* @return the ClusterView representing the provided info
*/
static ClusterView fromDocument(int localInstanceId, String clusterId, ClusterViewDocument clusterViewDoc, Set<Integer> backlogIds) {
Set<Integer> activeIds = clusterViewDoc.getActiveIds();
Set<Integer> deactivatingIds = new HashSet<Integer>();
deactivatingIds.addAll(clusterViewDoc.getRecoveringIds());
deactivatingIds.addAll(backlogIds);
Set<Integer> inactiveIds = new HashSet<Integer>();
inactiveIds.addAll(clusterViewDoc.getInactiveIds());
if (!inactiveIds.removeAll(backlogIds) && backlogIds.size() > 0) {
// then not all backlogIds were listed is inactive - which is
// contrary to the expectation
// in which case we indeed do a paranoia exception here:
throw new IllegalStateException(
"not all backlogIds (" + backlogIds + ") are part of inactiveIds (" + clusterViewDoc.getInactiveIds() + ")");
}
// clusterViewDoc.getClusterViewId() used to provide the 'clusterViewId'
// as defined within the settings collection of the DocumentStore.
// with OAK-4006 however we're changing this to use one clusterId
// within oak - provided and controlled by ClusterRepositoryInfo.
return new ClusterView(clusterViewDoc.getViewSeqNum(), backlogIds.size() == 0, clusterId,
localInstanceId, activeIds, deactivatingIds, inactiveIds);
}
ClusterView(final long viewSeqNum, final boolean viewFinal, final String clusterId, final int localId,
final Set<Integer> activeIds, final Set<Integer> deactivatingIds, final Set<Integer> inactiveIds) {
if (viewSeqNum < 0) {
throw new IllegalStateException("viewSeqNum must be zero or higher: " + viewSeqNum);
}
if (clusterId == null || clusterId.length() == 0) {
throw new IllegalStateException("clusterId must not be zero or empty: " + clusterId);
}
if (localId < 0) {
throw new IllegalStateException("localId must not be zero or higher: " + localId);
}
if (activeIds == null || activeIds.size() == 0) {
throw new IllegalStateException("activeIds must not be null or empty");
}
if (deactivatingIds == null) {
throw new IllegalStateException("deactivatingIds must not be null");
}
if (inactiveIds == null) {
throw new IllegalStateException("inactiveIds must not be null");
}
json = asJson(viewSeqNum, viewFinal, clusterId, localId, activeIds, deactivatingIds, inactiveIds);
}
/**
* Converts the provided parameters into the clusterview json that will be
* provided via JMX
**/
private String asJson(final long viewSeqNum, final boolean viewFinal, final String clusterId, final int localId,
final Set<Integer> activeIds, final Set<Integer> deactivatingIds, final Set<Integer> inactiveIds) {
JsopBuilder builder = new JsopBuilder();
builder.object();
builder.key("seq").value(viewSeqNum);
builder.key("final").value(viewFinal);
builder.key("id").value(clusterId);
builder.key("me").value(localId);
builder.key("active").array();
for (Iterator<Integer> it = activeIds.iterator(); it.hasNext();) {
Integer anInstance = it.next();
builder.value(anInstance);
}
builder.endArray();
builder.key("deactivating").array();
for (Iterator<Integer> it = deactivatingIds.iterator(); it.hasNext();) {
Integer anInstance = it.next();
builder.value(anInstance);
}
builder.endArray();
builder.key("inactive").array();
for (Iterator<Integer> it = inactiveIds.iterator(); it.hasNext();) {
Integer anInstance = it.next();
builder.value(anInstance);
}
builder.endArray();
builder.endObject();
return builder.toString();
}
/** Debugging toString() **/
@Override
public String toString() {
return "a ClusterView[" + json + "]";
}
/** This is the main getter that will be polled via JMX **/
String asDescriptorValue() {
return json;
}
}