blob: 909bf6bbe39269a1098fd1cb6d0953efbf401722 [file] [log] [blame]
package org.apache.commons.jcs3.auxiliary.remote;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import java.util.List;
import java.util.ListIterator;
import org.apache.commons.jcs3.auxiliary.AbstractAuxiliaryCacheMonitor;
import org.apache.commons.jcs3.auxiliary.remote.behavior.IRemoteCacheAttributes;
import org.apache.commons.jcs3.engine.CacheStatus;
import org.apache.commons.jcs3.engine.behavior.ICache;
/**
* The RemoteCacheFailoverRunner tries to establish a connection with a failover
* server, if any are defined. Once a failover connection is made, it will
* attempt to replace the failover with the primary remote server.
* <p>
* It works by switching out the RemoteCacheNoWait inside the Facade.
* <p>
* Client (i.e.) the CompositeCache has reference to a RemoteCacheNoWaitFacade.
* This facade is created by the RemoteCacheFactory. The factory maintains a set
* of managers, one for each remote server. Typically, there will only be one
* manager.
* <p>
* If you use multiple remote servers, you may want to set one or more as
* failovers. If a local cache cannot connect to the primary server, or looses
* its connection to the primary server, it will attempt to restore that
* Connection in the background. If failovers are defined, the Failover runner
* will try to connect to a failover until the primary is restored.
*
*/
public class RemoteCacheFailoverRunner<K, V> extends AbstractAuxiliaryCacheMonitor
{
/** The facade returned to the composite cache. */
private final RemoteCacheNoWaitFacade<K, V> facade;
/** Factory instance */
private final RemoteCacheFactory cacheFactory;
/**
* Constructor for the RemoteCacheFailoverRunner object. This allows the
* FailoverRunner to modify the facade that the CompositeCache references.
*
* @param facade the facade the CompositeCache talks to.
* @param cacheFactory the cache factory instance
*/
public RemoteCacheFailoverRunner( RemoteCacheNoWaitFacade<K, V> facade, RemoteCacheFactory cacheFactory )
{
super("JCS-RemoteCacheFailoverRunner");
this.facade = facade;
this.cacheFactory = cacheFactory;
setIdlePeriod(20000L);
}
/**
* Clean up all resources before shutdown
*/
@Override
protected void dispose()
{
// empty
}
/**
* do actual work
*/
@Override
protected void doWork()
{
// empty
}
/**
* Main processing method for the RemoteCacheFailoverRunner object.
* <p>
* If we do not have a connection with any failover server, this will try to
* connect one at a time. If no connection can be made, it goes to sleep for
* a while (20 seconds).
* <p>
* Once a connection with a failover is made, we will try to reconnect to
* the primary server.
* <p>
* The primary server is the first server defines in the FailoverServers
* list.
*/
@Override
public void run()
{
// start the main work of connecting to a failover and then restoring
// the primary.
connectAndRestore();
if ( log.isInfoEnabled() )
{
int failoverIndex = facade.getAuxiliaryCacheAttributes().getFailoverIndex();
log.info( "Exiting failover runner. Failover index = {0}", failoverIndex);
if ( failoverIndex <= 0 )
{
log.info( "Failover index is <= 0, meaning we are not connected to a failover server." );
}
else if ( failoverIndex > 0 )
{
log.info( "Failover index is > 0, meaning we are connected to a failover server." );
}
// log if we are allright or not.
}
}
/**
* This is the main loop. If there are failovers defined, then this will
* continue until the primary is re-connected. If no failovers are defined,
* this will exit automatically.
*/
private void connectAndRestore()
{
IRemoteCacheAttributes rca0 = facade.getAuxiliaryCacheAttributes();
do
{
log.info( "Remote cache FAILOVER RUNNING." );
// there is no active listener
if ( !allright.get() )
{
// Monitor each RemoteCacheManager instance one after the other.
// Each RemoteCacheManager corresponds to one remote connection.
List<RemoteLocation> failovers = rca0.getFailovers();
// we should probably check to see if there are any failovers,
// even though the caller
// should have already.
if ( failovers == null )
{
log.warn( "Remote is misconfigured, failovers was null." );
return;
}
else if ( failovers.size() == 1 )
{
// if there is only the primary, return out of this
log.info( "No failovers defined, exiting failover runner." );
return;
}
int fidx = rca0.getFailoverIndex();
log.debug( "fidx = {0} failovers.size = {1}", () -> fidx,
() -> failovers.size() );
// shouldn't we see if the primary is backup?
// If we don't check the primary, if it gets connected in the
// background,
// we will disconnect it only to put it right back
ListIterator<RemoteLocation> i = failovers.listIterator(fidx); // + 1; // +1 skips the primary
log.debug( "starting at failover i = {0}", i );
// try them one at a time until successful
for ( ; i.hasNext() && !allright.get();)
{
RemoteLocation server = i.next();
log.debug( "Trying server [{1}] at failover index i = {1}",
server, i );
RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone();
rca.setRemoteLocation(server);
RemoteCacheManager rcm = cacheFactory.getManager( rca );
log.debug( "RemoteCacheAttributes for failover = {0}", rca );
if (rcm != null)
{
// add a listener if there are none, need to tell rca
// what number it is at
ICache<K, V> ic = rcm.getCache( rca );
if ( ic.getStatus() == CacheStatus.ALIVE )
{
// may need to do this more gracefully
log.debug( "resetting no wait" );
facade.restorePrimaryServer((RemoteCacheNoWait<K, V>) ic);
rca0.setFailoverIndex( i.nextIndex() );
log.debug( "setting ALLRIGHT to true" );
if ( i.hasPrevious() )
{
log.debug( "Moving to Primary Recovery Mode, failover index = {0}", i );
}
else
{
log.debug( "No need to connect to failover, the primary server is back up." );
}
allright.set(true);
log.info( "CONNECTED to host = [{0}]",
() -> rca.getRemoteLocation() );
}
}
}
}
// end if !allright
// get here if while index >0 and allright, meaning that we are
// connected to some backup server.
else
{
log.debug( "ALLRIGHT is true " );
log.info( "Failover runner is in primary recovery mode. "
+ "Failover index = {0} Will now try to reconnect to "
+ "primary server.", () -> rca0.getFailoverIndex() );
}
boolean primaryRestoredSuccessfully = false;
// if we are not connected to the primary, try.
if ( rca0.getFailoverIndex() > 0 )
{
primaryRestoredSuccessfully = restorePrimary();
log.debug( "Primary recovery success state = {0}",
primaryRestoredSuccessfully );
}
if ( !primaryRestoredSuccessfully )
{
// Time driven mode: sleep between each round of recovery
// attempt.
try
{
log.warn( "Failed to reconnect to primary server. "
+ "Cache failover runner is going to sleep for "
+ "{0} milliseconds.", idlePeriod );
Thread.sleep( idlePeriod );
}
catch ( InterruptedException ex )
{
// ignore;
}
}
// try to bring the listener back to the primary
}
while ( rca0.getFailoverIndex() > 0 || !allright.get() );
// continue if the primary is not restored or if things are not allright.
}
/**
* Try to restore the primary server.
* <p>
* Once primary is restored the failover listener must be deregistered.
* <p>
* The primary server is the first server defines in the FailoverServers
* list.
*
* @return boolean value indicating whether the restoration was successful
*/
private boolean restorePrimary()
{
IRemoteCacheAttributes rca0 = facade.getAuxiliaryCacheAttributes();
// try to move back to the primary
RemoteLocation server = rca0.getFailovers().get(0);
log.info( "Trying to restore connection to primary remote server "
+ "[{0}]", server );
RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone();
rca.setRemoteLocation(server);
RemoteCacheManager rcm = cacheFactory.getManager( rca );
if (rcm != null)
{
// add a listener if there are none, need to tell rca what number it
// is at
ICache<K, V> ic = rcm.getCache( rca );
// by default the listener id should be 0, else it will be the
// listener
// Originally associated with the remote cache. either way is fine.
// We just don't want the listener id from a failover being used.
// If the remote server was rebooted this could be a problem if new
// locals were also added.
if ( ic.getStatus() == CacheStatus.ALIVE )
{
try
{
// we could have more than one listener registered right
// now.
// this will not result in a loop, only duplication
// stop duplicate listening.
if ( facade.getPrimaryServer() != null && facade.getPrimaryServer().getStatus() == CacheStatus.ALIVE )
{
int fidx = rca0.getFailoverIndex();
if ( fidx > 0 )
{
RemoteLocation serverOld = rca0.getFailovers().get(fidx);
log.debug( "Failover Index = {0} the server at that "
+ "index is [{1}]", fidx, serverOld );
if ( serverOld != null )
{
// create attributes that reflect the
// previous failed over configuration.
RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) rca0.clone();
rcaOld.setRemoteLocation(serverOld);
RemoteCacheManager rcmOld = cacheFactory.getManager( rcaOld );
if ( rcmOld != null )
{
// manager can remove by name if
// necessary
rcmOld.removeRemoteCacheListener( rcaOld );
}
log.info( "Successfully deregistered from "
+ "FAILOVER remote server = {0}", serverOld );
}
}
else if ( fidx == 0 )
{
// this should never happen. If there are no
// failovers this shouldn't get called.
if ( log.isDebugEnabled() )
{
log.debug( "No need to restore primary, it is already restored." );
return true;
}
}
else if ( fidx < 0 )
{
// this should never happen
log.warn( "Failover index is less than 0, this shouldn't happen" );
}
}
}
catch ( IOException e )
{
// TODO, should try again, or somehow stop the listener
log.error("Trouble trying to deregister old failover "
+ "listener prior to restoring the primary = {0}",
server, e );
}
// Restore primary
// may need to do this more gracefully, letting the failover finish in the background
RemoteCacheNoWait<K, V> failoverNoWait = facade.getPrimaryServer();
// swap in a new one
facade.restorePrimaryServer((RemoteCacheNoWait<K, V>) ic);
rca0.setFailoverIndex( 0 );
String message = "Successfully reconnected to PRIMARY "
+ "remote server. Substituted primary for "
+ "failoverNoWait [" + failoverNoWait + "]";
log.info( message );
if ( facade.getCacheEventLogger() != null )
{
facade.getCacheEventLogger().logApplicationEvent(
"RemoteCacheFailoverRunner", "RestoredPrimary",
message );
}
return true;
}
}
// else all right
// if the failover index was at 0 here, we would be in a bad
// situation, unless there were just
// no failovers configured.
log.debug( "Primary server status in error, not connected." );
return false;
}
}