src/components/artemis/documentation/2.4.0/ha.html - activemq-website - Git at Google


 <!DOCTYPE HTML>
 <html lang="" >
     <head>
         <meta charset="UTF-8">
         <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
         <title>High Availability and Failover · ActiveMQ Artemis Documentation</title>
         <meta http-equiv="X-UA-Compatible" content="IE=edge" />
         <meta name="description" content="">
         <meta name="generator" content="GitBook 3.2.3">


     <link rel="stylesheet" href="gitbook/style.css">


                 <link rel="stylesheet" href="gitbook/gitbook-plugin-highlight/website.css">


                 <link rel="stylesheet" href="gitbook/gitbook-plugin-search/search.css">


                 <link rel="stylesheet" href="gitbook/gitbook-plugin-fontsettings/website.css">


     <meta name="HandheldFriendly" content="true"/>
     <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
     <meta name="apple-mobile-web-app-capable" content="yes">
     <meta name="apple-mobile-web-app-status-bar-style" content="black">
     <link rel="apple-touch-icon-precomposed" sizes="152x152" href="gitbook/images/apple-touch-icon-precomposed-152.png">
     <link rel="shortcut icon" href="gitbook/images/favicon.ico" type="image/x-icon">


     <link rel="next" href="graceful-shutdown.html" />


     <link rel="prev" href="clusters.html" />


     </head>
     <body>

 <div class="book">
     <div class="book-summary">


 <div id="book-search-input" role="search">
     <input type="text" placeholder="Type to search" />
 </div>


                 <nav role="navigation">


 <ul class="summary">


         <li class="chapter " data-level="1.1" data-path="./">

                 <a href="./">


                     Introduction

                 </a>


         </li>

         <li class="chapter " data-level="1.2" data-path="notice.html">

                 <a href="notice.html">


                     Legal Notice

                 </a>


         </li>

         <li class="chapter " data-level="1.3" data-path="preface.html">

                 <a href="preface.html">


                     Preface

                 </a>


         </li>

         <li class="chapter " data-level="1.4" data-path="project-info.html">

                 <a href="project-info.html">


                     Project Info

                 </a>


         </li>

         <li class="chapter " data-level="1.5" data-path="messaging-concepts.html">

                 <a href="messaging-concepts.html">


                     Messaging Concepts

                 </a>


         </li>

         <li class="chapter " data-level="1.6" data-path="architecture.html">

                 <a href="architecture.html">


                     Architecture

                 </a>


         </li>

         <li class="chapter " data-level="1.7" data-path="using-server.html">

                 <a href="using-server.html">


                     Using the Server

                 </a>


         </li>

         <li class="chapter " data-level="1.8" data-path="address-model.html">

                 <a href="address-model.html">


                     Address Model

                 </a>


         </li>

         <li class="chapter " data-level="1.9" data-path="using-jms.html">

                 <a href="using-jms.html">


                     Using JMS

                 </a>


         </li>

         <li class="chapter " data-level="1.10" data-path="using-core.html">

                 <a href="using-core.html">


                     Using Core

                 </a>


         </li>

         <li class="chapter " data-level="1.11" data-path="using-AMQP.html">

                 <a href="using-AMQP.html">


                     Using AMQP

                 </a>


         </li>

         <li class="chapter " data-level="1.12" data-path="jms-core-mapping.html">

                 <a href="jms-core-mapping.html">


                     Mapping JMS Concepts to the Core API

                 </a>


         </li>

         <li class="chapter " data-level="1.13" data-path="client-classpath.html">

                 <a href="client-classpath.html">


                     The Client Classpath

                 </a>


         </li>

         <li class="chapter " data-level="1.14" data-path="examples.html">

                 <a href="examples.html">


                     Examples

                 </a>


         </li>

         <li class="chapter " data-level="1.15" data-path="wildcard-routing.html">

                 <a href="wildcard-routing.html">


                     Routing Messages With Wild Cards

                 </a>


         </li>

         <li class="chapter " data-level="1.16" data-path="wildcard-syntax.html">

                 <a href="wildcard-syntax.html">


                     Understanding the Apache ActiveMQ Artemis Wildcard Syntax

                 </a>


         </li>

         <li class="chapter " data-level="1.17" data-path="filter-expressions.html">

                 <a href="filter-expressions.html">


                     Filter Expressions

                 </a>


         </li>

         <li class="chapter " data-level="1.18" data-path="persistence.html">

                 <a href="persistence.html">


                     Persistence

                 </a>


         </li>

         <li class="chapter " data-level="1.19" data-path="configuring-transports.html">

                 <a href="configuring-transports.html">


                     Configuring Transports

                 </a>


         </li>

         <li class="chapter " data-level="1.20" data-path="config-reload.html">

                 <a href="config-reload.html">


                     Configuration Reload

                 </a>


         </li>

         <li class="chapter " data-level="1.21" data-path="connection-ttl.html">

                 <a href="connection-ttl.html">


                     Detecting Dead Connections

                 </a>


         </li>

         <li class="chapter " data-level="1.22" data-path="slow-consumers.html">

                 <a href="slow-consumers.html">


                     Detecting Slow Consumers

                 </a>


         </li>

         <li class="chapter " data-level="1.23" data-path="network-isolation.html">

                 <a href="network-isolation.html">


                     Avoiding Network Isolation

                 </a>


         </li>

         <li class="chapter " data-level="1.24" data-path="critical-analysis.html">

                 <a href="critical-analysis.html">


                     Detecting Broker Issues (Critical Analysis)

                 </a>


         </li>

         <li class="chapter " data-level="1.25" data-path="transaction-config.html">

                 <a href="transaction-config.html">


                     Resource Manager Configuration

                 </a>


         </li>

         <li class="chapter " data-level="1.26" data-path="flow-control.html">

                 <a href="flow-control.html">


                     Flow Control

                 </a>


         </li>

         <li class="chapter " data-level="1.27" data-path="send-guarantees.html">

                 <a href="send-guarantees.html">


                     Guarantees of sends and commits

                 </a>


         </li>

         <li class="chapter " data-level="1.28" data-path="undelivered-messages.html">

                 <a href="undelivered-messages.html">


                     Message Redelivery and Undelivered Messages

                 </a>


         </li>

         <li class="chapter " data-level="1.29" data-path="message-expiry.html">

                 <a href="message-expiry.html">


                     Message Expiry

                 </a>


         </li>

         <li class="chapter " data-level="1.30" data-path="large-messages.html">

                 <a href="large-messages.html">


                     Large Messages

                 </a>


         </li>

         <li class="chapter " data-level="1.31" data-path="paging.html">

                 <a href="paging.html">


                     Paging

                 </a>


         </li>

         <li class="chapter " data-level="1.32" data-path="queue-attributes.html">

                 <a href="queue-attributes.html">


                     Queue Attributes

                 </a>


         </li>

         <li class="chapter " data-level="1.33" data-path="scheduled-messages.html">

                 <a href="scheduled-messages.html">


                     Scheduled Messages

                 </a>


         </li>

         <li class="chapter " data-level="1.34" data-path="last-value-queues.html">

                 <a href="last-value-queues.html">


                     Last-Value Queues

                 </a>


         </li>

         <li class="chapter " data-level="1.35" data-path="message-grouping.html">

                 <a href="message-grouping.html">


                     Message Grouping

                 </a>


         </li>

         <li class="chapter " data-level="1.36" data-path="pre-acknowledge.html">

                 <a href="pre-acknowledge.html">


                     Extra Acknowledge Modes

                 </a>


         </li>

         <li class="chapter " data-level="1.37" data-path="management.html">

                 <a href="management.html">


                     Management

                 </a>


         </li>

         <li class="chapter " data-level="1.38" data-path="management-console.html">

                 <a href="management-console.html">


                     Management Console

                 </a>


         </li>

         <li class="chapter " data-level="1.39" data-path="security.html">

                 <a href="security.html">


                     Security

                 </a>


         </li>

         <li class="chapter " data-level="1.40" data-path="masking-passwords.html">

                 <a href="masking-passwords.html">


                     Masking Passwords

                 </a>


         </li>

         <li class="chapter " data-level="1.41" data-path="broker-plugins.html">

                 <a href="broker-plugins.html">


                     Broker Plugins

                 </a>


         </li>

         <li class="chapter " data-level="1.42" data-path="resource-limits.html">

                 <a href="resource-limits.html">


                     Resource Limits

                 </a>


         </li>

         <li class="chapter " data-level="1.43" data-path="jms-bridge.html">

                 <a href="jms-bridge.html">


                     The JMS Bridge

                 </a>


         </li>

         <li class="chapter " data-level="1.44" data-path="client-reconnection.html">

                 <a href="client-reconnection.html">


                     Client Reconnection and Session Reattachment

                 </a>


         </li>

         <li class="chapter " data-level="1.45" data-path="diverts.html">

                 <a href="diverts.html">


                     Diverting and Splitting Message Flows

                 </a>


         </li>

         <li class="chapter " data-level="1.46" data-path="core-bridges.html">

                 <a href="core-bridges.html">


                     Core Bridges

                 </a>


         </li>

         <li class="chapter " data-level="1.47" data-path="duplicate-detection.html">

                 <a href="duplicate-detection.html">


                     Duplicate Message Detection

                 </a>


         </li>

         <li class="chapter " data-level="1.48" data-path="clusters.html">

                 <a href="clusters.html">


                     Clusters

                 </a>


         </li>

         <li class="chapter active" data-level="1.49" data-path="ha.html">

                 <a href="ha.html">


                     High Availability and Failover

                 </a>


         </li>

         <li class="chapter " data-level="1.50" data-path="graceful-shutdown.html">

                 <a href="graceful-shutdown.html">


                     Graceful Server Shutdown

                 </a>


         </li>

         <li class="chapter " data-level="1.51" data-path="libaio.html">

                 <a href="libaio.html">


                     Libaio Native Libraries

                 </a>


         </li>

         <li class="chapter " data-level="1.52" data-path="thread-pooling.html">

                 <a href="thread-pooling.html">


                     Thread management

                 </a>


         </li>

         <li class="chapter " data-level="1.53" data-path="logging.html">

                 <a href="logging.html">


                     Logging

                 </a>


         </li>

         <li class="chapter " data-level="1.54" data-path="rest.html">

                 <a href="rest.html">


                     REST Interface

                 </a>


         </li>

         <li class="chapter " data-level="1.55" data-path="embedding-activemq.html">

                 <a href="embedding-activemq.html">


                     Embedding Apache ActiveMQ Artemis

                 </a>


         </li>

         <li class="chapter " data-level="1.56" data-path="karaf.html">

                 <a href="karaf.html">


                     Apache Karaf

                 </a>


         </li>

         <li class="chapter " data-level="1.57" data-path="spring-integration.html">

                 <a href="spring-integration.html">


                     Spring Integration

                 </a>


         </li>

         <li class="chapter " data-level="1.58" data-path="cdi-integration.html">

                 <a href="cdi-integration.html">


                     CDI Integration

                 </a>


         </li>

         <li class="chapter " data-level="1.59" data-path="intercepting-operations.html">

                 <a href="intercepting-operations.html">


                     Intercepting Operations

                 </a>


         </li>

         <li class="chapter " data-level="1.60" data-path="protocols-interoperability.html">

                 <a href="protocols-interoperability.html">


                     Protocols and Interoperability

                 </a>


         </li>

         <li class="chapter " data-level="1.61" data-path="tools.html">

                 <a href="tools.html">


                     Tools

                 </a>


         </li>

         <li class="chapter " data-level="1.62" data-path="maven-plugin.html">

                 <a href="maven-plugin.html">


                     Maven Plugin

                 </a>


         </li>

         <li class="chapter " data-level="1.63" data-path="unit-testing.html">

                 <a href="unit-testing.html">


                     Unit Testing

                 </a>


         </li>

         <li class="chapter " data-level="1.64" data-path="perf-tuning.html">

                 <a href="perf-tuning.html">


                     Troubleshooting and Performance Tuning

                 </a>


         </li>

         <li class="chapter " data-level="1.65" data-path="configuration-index.html">

                 <a href="configuration-index.html">


                     Configuration Reference

                 </a>


         </li>

         <li class="chapter " data-level="1.66" data-path="updating-artemis.html">

                 <a href="updating-artemis.html">


                     Updating Artemis

                 </a>


         </li>


     <li class="divider"></li>

     <li>
         <a href="https://www.gitbook.com" target="blank" class="gitbook-link">
             Published with GitBook
         </a>
     </li>
 </ul>


                 </nav>


     </div>

     <div class="book-body">

             <div class="body-inner">


 <div class="book-header" role="navigation">


     <!-- Title -->
     <h1>
         <i class="fa fa-circle-o-notch fa-spin"></i>
         <a href="." >High Availability and Failover</a>
     </h1>
 </div>


                     <div class="page-wrapper" tabindex="-1" role="main">
                         <div class="page-inner">

 <div id="book-search-results">
     <div class="search-noresults">

                                 <section class="normal markdown-section">

                                 <h1 id="high-availability-and-failover">High Availability and Failover</h1>
 <p>We define high availability as the <em>ability for the system to continue
 functioning after failure of one or more of the servers</em>.</p>
 <p>A part of high availability is <em>failover</em> which we define as the
 <em>ability for client connections to migrate from one server to another in
 event of server failure so client applications can continue to operate</em>.</p>
 <h2 id="live---backup-groups">Live - Backup Groups</h2>
 <p>Apache ActiveMQ Artemis allows servers to be linked together as <em>live - backup</em> groups
 where each live server can have 1 or more backup servers. A backup
 server is owned by only one live server. Backup servers are not
 operational until failover occurs, however 1 chosen backup, which will
 be in passive mode, announces its status and waits to take over the live
 servers work</p>
 <p>Before failover, only the live server is serving the Apache ActiveMQ Artemis clients
 while the backup servers remain passive or awaiting to become a backup
 server. When a live server crashes or is brought down in the correct
 mode, the backup server currently in passive mode will become live and
 another backup server will become passive. If a live server restarts
 after a failover then it will have priority and be the next server to
 become live when the current live server goes down, if the current live
 server is configured to allow automatic failback then it will detect the
 live server coming back up and automatically stop.</p>
 <h3 id="ha-policies">HA Policies</h3>
 <p>Apache ActiveMQ Artemis supports two different strategies for backing up a server
 <em>shared store</em> and <em>replication</em>. Which is configured via the
 <code>ha-policy</code> configuration element.</p>
 <pre><code>&lt;ha-policy&gt;
   &lt;replication/&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>or</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store/&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>As well as these 2 strategies there is also a 3rd called <code>live-only</code>.
 This of course means there will be no Backup Strategy and is the default
 if none is provided, however this is used to configure <code>scale-down</code>
 which we will cover in a later chapter.</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>The <code>ha-policy</code> configurations replaces any current HA configuration
 in the root of the <code>broker.xml</code> configuration. All old
 configuration is now deprecated although best efforts will be made to
 honour it if configured this way.</p>
 <p><strong>Note</strong></p>
 <p>Only persistent message data will survive failover. Any non persistent
 message data will not be available after failover.</p>
 </blockquote>
 <p>The <code>ha-policy</code> type configures which strategy a cluster should use to
 provide the backing up of a servers data. Within this configuration
 element is configured how a server should behave within the cluster,
 either as a master (live), slave (backup) or colocated (both live and
 backup). This would look something like:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;master/&gt;
    &lt;/replication&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>or</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store/&gt;
       &lt;slave/&gt;
    &lt;/shared-store/&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>or</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;colocated/&gt;
    &lt;/replication&gt;
 &lt;/ha-policy&gt;
 </code></pre><h3 id="data-replication">Data Replication</h3>
 <p>When using replication, the live and the backup servers do not share the
 same data directories, all data synchronization is done over the
 network. Therefore all (persistent) data received by the live server
 will be duplicated to the backup.</p>
 <p>Notice that upon start-up the backup server will first need to
 synchronize all existing data from the live server before becoming
 capable of replacing the live server should it fail. So unlike when
 using shared storage, a replicating backup will not be a fully
 operational backup right after start-up, but only after it finishes
 synchronizing the data with its live server. The time it will take for
 this to happen will depend on the amount of data to be synchronized and
 the connection speed.</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>In general, synchronization occurs in parallel with current network traffic so
 this won&apos;t cause any blocking on current clients. However, there is a critical
 moment at the end of this process where the replicating server must complete
 the synchronization and ensure the replica acknowledges this completion. This
 exchange between the replicating server and replica will block any journal
 related operations. The maximum length of time that this exchange will block
 is controlled by the <code>initial-replication-sync-timeout</code> configuration element.</p>
 </blockquote>
 <p>Replication will create a copy of the data at the backup. One issue to
 be aware of is: in case of a successful fail-over, the backup&apos;s data
 will be newer than the one at the live&apos;s storage. If you configure your
 live server to perform a failback to live server when restarted, it will synchronize its data
 with the backup&apos;s. If both servers are shutdown, the administrator will
 have to determine which one has the latest data.</p>
 <p>The replicating live and backup pair must be part of a cluster. The
 Cluster Connection also defines how backup servers will find the remote
 live servers to pair with. Refer to <a href="clusters.html">Clusters</a> for details on how this is done,
 and how to configure a cluster connection. Notice that:</p>
 <ul>
 <li><p>Both live and backup servers must be part of the same cluster.
 Notice that even a simple live/backup replicating pair will require
 a cluster configuration.</p>
 </li>
 <li><p>Their cluster user and password must match.</p>
 </li>
 </ul>
 <p>Within a cluster, there are two ways that a backup server will locate a
 live server to replicate from, these are:</p>
 <ul>
 <li><p><code>specifying a node group</code>. You can specify a group of live servers
 that a backup server can connect to. This is done by configuring
 <code>group-name</code> in either the <code>master</code> or the <code>slave</code> element of the
 <code>broker.xml</code>. A Backup server will only connect to a
 live server that shares the same node group name</p>
 </li>
 <li><p><code>connecting to any live</code>. This will be the behaviour if <code>group-name</code>
 is not configured allowing a backup server to connect to any live
 server</p>
 </li>
 </ul>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>A <code>group-name</code> example: suppose you have 5 live servers and 6 backup
 servers:</p>
 <ul>
 <li><p><code>live1</code>, <code>live2</code>, <code>live3</code>: with <code>group-name=fish</code></p>
 </li>
 <li><p><code>live4</code>, <code>live5</code>: with <code>group-name=bird</code></p>
 </li>
 <li><p><code>backup1</code>, <code>backup2</code>, <code>backup3</code>, <code>backup4</code>: with <code>group-name=fish</code></p>
 </li>
 <li><p><code>backup5</code>, <code>backup6</code>: with <code>group-name=bird</code></p>
 </li>
 </ul>
 <p>After joining the cluster the backups with <code>group-name=fish</code> will
 search for live servers with <code>group-name=fish</code> to pair with. Since
 there is one backup too many, the <code>fish</code> will remain with one spare
 backup.</p>
 <p>The 2 backups with <code>group-name=bird</code> (<code>backup5</code> and <code>backup6</code>) will
 pair with live servers <code>live4</code> and <code>live5</code>.</p>
 </blockquote>
 <p>The backup will search for any live server that it is configured to
 connect to. It then tries to replicate with each live server in turn
 until it finds a live server that has no current backup configured. If
 no live server is available it will wait until the cluster topology
 changes and repeats the process.</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>This is an important distinction from a shared-store backup, if a
 backup starts and does not find a live server, the server will just
 activate and start to serve client requests. In the replication case,
 the backup just keeps waiting for a live server to pair with. Note
 that in replication the backup server does not know whether any data
 it might have is up to date, so it really cannot decide to activate
 automatically. To activate a replicating backup server using the data
 it has, the administrator must change its configuration to make it a
 live server by changing <code>slave</code> to <code>master</code>.</p>
 </blockquote>
 <p>Much like in the shared-store case, when the live server stops or
 crashes, its replicating backup will become active and take over its
 duties. Specifically, the backup will become active when it loses
 connection to its live server. This can be problematic because this can
 also happen because of a temporary network problem. In order to address
 this issue, the backup will try to determine whether it still can
 connect to the other servers in the cluster. If it can connect to more
 than half the servers, it will become active, if more than half the
 servers also disappeared with the live, the backup will wait and try
 reconnecting with the live. This avoids a split brain situation.</p>
 <h4 id="configuration">Configuration</h4>
 <p>To configure the live and backup servers to be a replicating pair,
 configure the live server in &apos; <code>broker.xml</code> to have:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;master/&gt;
    &lt;/replication&gt;
 &lt;/ha-policy&gt;
 .
 &lt;cluster-connections&gt;
    &lt;cluster-connection name=&quot;my-cluster&quot;&gt;
       ...
    &lt;/cluster-connection&gt;
 &lt;/cluster-connections&gt;
 </code></pre><p>The backup server must be similarly configured but as a <code>slave</code></p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;slave/&gt;
    &lt;/replication&gt;
 &lt;/ha-policy&gt;
 </code></pre><h4 id="all-replication-configuration">All Replication Configuration</h4>
 <p>The following table lists all the <code>ha-policy</code> configuration elements for
 HA strategy Replication for <code>master</code>:</p>
 <table summary="HA Replication Master Policy" border="1">
     <colgroup>
         <col>
         <col>
     </colgroup>
     <thead>
     <tr>
         <th>Name</th>
         <th>Description</th>
     </tr>
     </thead>
     <tbody>
     <tr>
         <td>`check-for-live-server`</td>
         <td>Whether to check the cluster for a (live) server using our own server ID
         when starting up. This option is only necessary for performing &apos;fail-back&apos;
         on replicating servers.</td>
     </tr>
     <tr>
         <td>`cluster-name`</td>
         <td>Name of the cluster configuration to use for replication. This setting is
         only necessary if you configure multiple cluster connections. If configured then
         the connector configuration of the cluster configuration with this name will be
         used when connecting to the cluster to discover if a live server is already running,
         see `check-for-live-server`. If unset then the default cluster connections configuration
         is used (the first one configured).</td>
     </tr>
     <tr>
         <td>`group-name`</td>
         <td>If set, backup servers will only pair with live servers with matching group-name.</td>
     </tr>
     <tr>
         <td>`initial-replication-sync-timeout`</td>
         <td>The amount of time the replicating server will wait at the completion of the initial
         replication process for the replica to acknowledge it has received all the necessary
         data. The default is 30,000 milliseconds. <strong>Note</strong>: during this interval any
         journal related operations will be blocked.</td>
     </tr>
     </tbody>
 </table>

 <p>The following table lists all the <code>ha-policy</code> configuration elements for
 HA strategy Replication for <code>slave</code>:</p>
 <table summary="HA Replication Slave Policy" border="1">
     <colgroup>
         <col>
         <col>
     </colgroup>
     <thead>
     <tr>
         <th>Name</th>
         <th>Description</th>
     </tr>
     </thead>
     <tbody>
     <tr>
         <td>`cluster-name`</td>
         <td>Name of the cluster configuration to use for replication.
         This setting is only necessary if you configure multiple cluster
         connections. If configured then the connector configuration of
         the cluster configuration with this name will be used when
         connecting to the cluster to discover if a live server is already
         running, see `check-for-live-server`. If unset then the default
         cluster connections configuration is used (the first one configured)</td>
     </tr>
     <tr>
         <td>`group-name`</td>
         <td>If set, backup servers will only pair with live servers with matching group-name</td>
     </tr>
     <tr>
         <td>`max-saved-replicated-journals-size`</td>
         <td>This specifies how many times a replicated backup server
         can restart after moving its files on start. Once there are
         this number of backup journal files the server will stop permanently
         after if fails back.</td>
     </tr>
     <tr>
         <td>`allow-failback`</td>
         <td>Whether a server will automatically stop when a another places a
         request to take over its place. The use case is when the backup has
         failed over</td>
     </tr>
     <tr>
         <td>`initial-replication-sync-timeout`</td>
         <td>After failover and the slave has become live, this is
         set on the new live server. It represents the amount of time
         the replicating server will wait at the completion of the
         initial replication process for the replica to acknowledge
         it has received all the necessary data. The default is
         30,000 milliseconds. <strong>Note</strong>: during this interval any
         journal related operations will be blocked.</td>
     </tr>
     </tbody>
 </table>

 <h3 id="shared-store">Shared Store</h3>
 <p>When using a shared store, both live and backup servers share the <em>same</em>
 entire data directory using a shared file system. This means the paging
 directory, journal directory, large messages and binding journal.</p>
 <p>When failover occurs and a backup server takes over, it will load the
 persistent storage from the shared file system and clients can connect
 to it.</p>
 <p>This style of high availability differs from data replication in that it
 requires a shared file system which is accessible by both the live and
 backup nodes. Typically this will be some kind of high performance
 Storage Area Network (SAN). We do not recommend you use Network Attached
 Storage (NAS), e.g. NFS mounts to store any shared journal (NFS is
 slow).</p>
 <p>The advantage of shared-store high availability is that no replication
 occurs between the live and backup nodes, this means it does not suffer
 any performance penalties due to the overhead of replication during
 normal operation.</p>
 <p>The disadvantage of shared store replication is that it requires a
 shared file system, and when the backup server activates it needs to
 load the journal from the shared store which can take some time
 depending on the amount of data in the store.</p>
 <p>If you require the highest performance during normal operation, have
 access to a fast SAN and live with a slightly slower failover (depending
 on amount of data).</p>
 <p><img src="images/ha-shared-store.png" alt="ActiveMQ Artemis ha-shared-store.png"></p>
 <h4 id="configuration">Configuration</h4>
 <p>To configure the live and backup servers to share their store, configure
 id via the <code>ha-policy</code> configuration in <code>broker.xml</code>:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store&gt;
       &lt;master/&gt;
    &lt;/shared-store&gt;
 &lt;/ha-policy&gt;
 .
 &lt;cluster-connections&gt;
    &lt;cluster-connection name=&quot;my-cluster&quot;&gt;
 ...
    &lt;/cluster-connection&gt;
 &lt;/cluster-connections&gt;
 </code></pre><p>The backup server must also be configured as a backup.</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store&gt;
       &lt;slave/&gt;
    &lt;/shared-store&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>In order for live - backup groups to operate properly with a shared
 store, both servers must have configured the location of journal
 directory to point to the <em>same shared location</em> (as explained in <a href="persistence.html">Configuring the message journal</a>)</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>todo write something about GFS</p>
 </blockquote>
 <p>Also each node, live and backups, will need to have a cluster connection
 defined even if not part of a cluster. The Cluster Connection info
 defines how backup servers announce there presence to its live server or
 any other nodes in the cluster. Refer to <a href="clusters.html">Clusters</a> for details on how this is
 done.</p>
 <h3 id="failing-back-to-live-server">Failing Back to live Server</h3>
 <p>After a live server has failed and a backup taken has taken over its
 duties, you may want to restart the live server and have clients fail
 back.</p>
 <p>In case of &quot;shared disk&quot;, simply restart the original live server and
 kill the new live server by can do this by killing the process itself.
 Alternatively you can set <code>allow-fail-back</code> to <code>true</code> on the slave
 config which will force the backup that has become live to automatically
 stop. This configuration would look like:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store&gt;
       &lt;slave&gt;
          &lt;allow-failback&gt;true&lt;/allow-failback&gt;
       &lt;/slave&gt;
    &lt;/shared-store&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>In replication HA mode you need to set an extra property
 <code>check-for-live-server</code> to <code>true</code> in the <code>master</code> configuration. If set
 to true, during start-up a live server will first search the cluster for
 another server using its nodeID. If it finds one, it will contact this
 server and try to &quot;fail-back&quot;. Since this is a remote replication
 scenario, the &quot;starting live&quot; will have to synchronize its data with the
 server running with its ID, once they are in sync, it will request the
 other server (which it assumes it is a back that has assumed its duties)
 to shutdown for it to take over. This is necessary because otherwise the
 live server has no means to know whether there was a fail-over or not,
 and if there was if the server that took its duties is still running or
 not. To configure this option at your <code>broker.xml</code>
 configuration file as follows:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;master&gt;
          &lt;check-for-live-server&gt;true&lt;/check-for-live-server&gt;
       &lt;master&gt;
    &lt;/replication&gt;
 &lt;/ha-policy&gt;
 </code></pre><blockquote>
 <p><strong>Warning</strong></p>
 <p>Be aware that if you restart a live server while after failover has
 occurred then <code>check-for-live-server</code> must be set to <code>true</code>. If not the live server
 will restart and server the same messages that the backup has already
 handled causing duplicates.</p>
 </blockquote>
 <p>It is also possible, in the case of shared store, to cause failover to
 occur on normal server shutdown, to enable this set the following
 property to true in the <code>ha-policy</code> configuration on either the <code>master</code>
 or <code>slave</code> like so:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store&gt;
       &lt;master&gt;
          &lt;failover-on-shutdown&gt;true&lt;/failover-on-shutdown&gt;
       &lt;/master&gt;
    &lt;/shared-store&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>By default this is set to false, if by some chance you have set this to
 false but still want to stop the server normally and cause failover then
 you can do this by using the management API as explained at <a href="management.html">Management</a></p>
 <p>You can also force the running live server to shutdown when the old live
 server comes back up allowing the original live server to take over
 automatically by setting the following property in the
 <code>broker.xml</code> configuration file as follows:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;shared-store&gt;
       &lt;slave&gt;
          &lt;allow-failback&gt;true&lt;/allow-failback&gt;
       &lt;/slave&gt;
    &lt;/shared-store&gt;
 &lt;/ha-policy&gt;
 </code></pre><h4 id="all-shared-store-configuration">All Shared Store Configuration</h4>
 <p>The following table lists all the <code>ha-policy</code> configuration elements for
 HA strategy shared store for <code>master</code>:</p>
 <table summary="HA Shared Store Master Policy" border="1">
     <colgroup>
         <col>
         <col>
     </colgroup>
     <thead>
     <tr>
         <th>Name</th>
         <th>Description</th>
     </tr>
     </thead>
     <tbody>
     <tr>
         <td>`failover-on-server-shutdown`</td>
         <td>If set to true then when this server is stopped
         normally the backup will become live assuming failover.
         If false then the backup server will remain passive.
         Note that if false you want failover to occur the you
         can use the the management API as explained at [Management](management.md)</td>
     </tr>
     <tr>
         <td>`wait-for-activation`</td>
         <td>If set to true then server startup will wait until it is activated.
         If set to false then server startup will be done in the background.
         Default is true.</td>
     </tr>
     </tbody>
 </table>

 <p>The following table lists all the <code>ha-policy</code> configuration elements for
 HA strategy Shared Store for <code>slave</code>:</p>
 <table summary="HA Shared Store Slave Policy" border="1">
     <colgroup>
         <col>
         <col>
     </colgroup>
     <thead>
     <tr>
         <th>Name</th>
         <th>Description</th>
     </tr>
     </thead>
     <tbody>
     <tr>
         <td>`failover-on-server-shutdown`</td>
         <td>In the case of a backup that has become live. then
         when set to true then when this server is stopped normally
         the backup will become liveassuming failover. If false then
         the backup server will remain passive. Note that if false
         you want failover to occur the you can use the the management
         API as explained at [Management](management.md)</td>
     </tr>
     <tr>
         <td>`allow-failback`</td>
         <td>Whether a server will automatically stop when a another
         places a request to take over its place. The use case is
         when the backup has failed over.</td>
     </tr>
     </tbody>
 </table>

 <h4 id="colocated-backup-servers">Colocated Backup Servers</h4>
 <p>It is also possible when running standalone to colocate backup servers
 in the same JVM as another live server. Live Servers can be configured
 to request another live server in the cluster to start a backup server
 in the same JVM either using shared store or replication. The new backup
 server will inherit its configuration from the live server creating it
 apart from its name, which will be set to <code>colocated_backup_n</code> where n
 is the number of backups the server has created, and any directories and
 its Connectors and Acceptors which are discussed later on in this
 chapter. A live server can also be configured to allow requests from
 backups and also how many backups a live server can start. this way you
 can evenly distribute backups around the cluster. This is configured via
 the <code>ha-policy</code> element in the <code>broker.xml</code> file like
 so:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;colocated&gt;
          &lt;request-backup&gt;true&lt;/request-backup&gt;
          &lt;max-backups&gt;1&lt;/max-backups&gt;
          &lt;backup-request-retries&gt;-1&lt;/backup-request-retries&gt;
          &lt;backup-request-retry-interval&gt;5000&lt;/backup-request-retry-interval&gt;
          &lt;master/&gt;
          &lt;slave/&gt;
       &lt;/colocated&gt;
    &lt;replication&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>the above example is configured to use replication, in this case the
 <code>master</code> and <code>slave</code> configurations must match those for normal
 replication as in the previous chapter. <code>shared-store</code> is also supported</p>
 <p><img src="images/ha-colocated.png" alt="ActiveMQ Artemis ha-colocated.png"></p>
 <h4 id="configuring-connectors-and-acceptors">Configuring Connectors and Acceptors</h4>
 <p>If the HA Policy is colocated then connectors and acceptors will be
 inherited from the live server creating it and offset depending on the
 setting of <code>backup-port-offset</code> configuration element. If this is set to
 say 100 (which is the default) and a connector is using port 61616 then
 this will be set to 5545 for the first server created, 5645 for the
 second and so on.</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>for INVM connectors and Acceptors the id will have
 <code>colocated_backup_n</code> appended, where n is the backup server number.</p>
 </blockquote>
 <h4 id="remote-connectors">Remote Connectors</h4>
 <p>It may be that some of the Connectors configured are for external
 servers and hence should be excluded from the offset. for instance a
 Connector used by the cluster connection to do quorum voting for a
 replicated backup server, these can be omitted from being offset by
 adding them to the <code>ha-policy</code> configuration like so:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;colocated&gt;
          &lt;excludes&gt;
             &lt;connector-ref&gt;remote-connector&lt;/connector-ref&gt;
          &lt;/excludes&gt;
 .........
 &lt;/ha-policy&gt;
 </code></pre><h4 id="configuring-directories">Configuring Directories</h4>
 <p>Directories for the Journal, Large messages and Paging will be set
 according to what the HA strategy is. If shared store the the requesting
 server will notify the target server of which directories to use. If
 replication is configured then directories will be inherited from the
 creating server but have the new backups name appended.</p>
 <p>The following table lists all the <code>ha-policy</code> configuration elements for colocated policy:</p>
 <table summary="HA Replication Colocation Policy" border="1">
     <colgroup>
         <col>
         <col>
     </colgroup>
     <thead>
     <tr>
         <th>Name</th>
         <th>Description</th>
     </tr>
     </thead>
     <tbody>
     <tr>
         <td>`request-backup`</td>
         <td>If true then the server will request a backup on another node</td>
     </tr>
     <tr>
         <td>`backup-request-retries`</td>
         <td>How many times the live server will try to request a backup, -1 means for ever.</td>
     </tr>
     <tr>
         <td>`backup-request-retry-interval`</td>
         <td>How long to wait for retries between attempts to request a backup server.</td>
     </tr>
     <tr>
         <td>`max-backups`</td>
         <td>How many backups a live server can create</td>
     </tr>
     <tr>
         <td>`backup-port-offset`</td>
         <td>The offset to use for the Connectors and Acceptors when creating a new backup server.</td>
     </tr>
     </tbody>
 </table>

 <h3 id="scaling-down">Scaling Down</h3>
 <p>An alternative to using Live/Backup groups is to configure scaledown.
 when configured for scale down a server can copy all its messages and
 transaction state to another live server. The advantage of this is that
 you dont need full backups to provide some form of HA, however there are
 disadvantages with this approach the first being that it only deals with
 a server being stopped and not a server crash. The caveat here is if you
 configure a backup to scale down.</p>
 <p>Another disadvantage is that it is possible to lose message ordering.
 This happens in the following scenario, say you have 2 live servers and
 messages are distributed evenly between the servers from a single
 producer, if one of the servers scales down then the messages sent back
 to the other server will be in the queue after the ones already there,
 so server 1 could have messages 1,3,5,7,9 and server 2 would have
 2,4,6,8,10, if server 2 scales down the order in server 1 would be
 1,3,5,7,9,2,4,6,8,10.</p>
 <p><img src="images/ha-scaledown.png" alt="ActiveMQ Artemis ha-scaledown.png"></p>
 <p>The configuration for a live server to scale down would be something
 like:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;live-only&gt;
       &lt;scale-down&gt;
          &lt;connectors&gt;
             &lt;connector-ref&gt;server1-connector&lt;/connector-ref&gt;
          &lt;/connectors&gt;
       &lt;/scale-down&gt;
    &lt;/live-only&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>In this instance the server is configured to use a specific connector to
 scale down, if a connector is not specified then the first INVM
 connector is chosen, this is to make scale down fromm a backup server
 easy to configure. It is also possible to use discovery to scale down,
 this would look like:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;live-only&gt;
       &lt;scale-down&gt;
          &lt;discovery-group-ref discovery-group-name=&quot;my-discovery-group&quot;/&gt;
       &lt;/scale-down&gt;
    &lt;/live-only&gt;
 &lt;/ha-policy&gt;
 </code></pre><h4 id="scale-down-with-groups">Scale Down with groups</h4>
 <p>It is also possible to configure servers to only scale down to servers
 that belong in the same group. This is done by configuring the group
 like so:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;live-only&gt;
       &lt;scale-down&gt;
          ...
          &lt;group-name&gt;my-group&lt;/group-name&gt;
       &lt;/scale-down&gt;
    &lt;/live-only&gt;
 &lt;/ha-policy&gt;
 </code></pre><p>In this scenario only servers that belong to the group <code>my-group</code> will
 be scaled down to</p>
 <h4 id="scale-down-and-backups">Scale Down and Backups</h4>
 <p>It is also possible to mix scale down with HA via backup servers. If a
 slave is configured to scale down then after failover has occurred,
 instead of starting fully the backup server will immediately scale down
 to another live server. The most appropriate configuration for this is
 using the <code>colocated</code> approach. it means as you bring up live server
 they will automatically be backed up by server and as live servers are
 shutdown, there messages are made available on another live server. A
 typical configuration would look like:</p>
 <pre><code>&lt;ha-policy&gt;
    &lt;replication&gt;
       &lt;colocated&gt;
          &lt;backup-request-retries&gt;44&lt;/backup-request-retries&gt;
          &lt;backup-request-retry-interval&gt;33&lt;/backup-request-retry-interval&gt;
          &lt;max-backups&gt;3&lt;/max-backups&gt;
          &lt;request-backup&gt;false&lt;/request-backup&gt;
          &lt;backup-port-offset&gt;33&lt;/backup-port-offset&gt;
          &lt;master&gt;
             &lt;group-name&gt;purple&lt;/group-name&gt;
             &lt;check-for-live-server&gt;true&lt;/check-for-live-server&gt;
             &lt;cluster-name&gt;abcdefg&lt;/cluster-name&gt;
          &lt;/master&gt;
          &lt;slave&gt;
             &lt;group-name&gt;tiddles&lt;/group-name&gt;
             &lt;max-saved-replicated-journals-size&gt;22&lt;/max-saved-replicated-journals-size&gt;
             &lt;cluster-name&gt;33rrrrr&lt;/cluster-name&gt;
             &lt;restart-backup&gt;false&lt;/restart-backup&gt;
             &lt;scale-down&gt;
                &lt;!--a grouping of servers that can be scaled down to--&gt;
                &lt;group-name&gt;boo!&lt;/group-name&gt;
                &lt;!--either a discovery group--&gt;
                &lt;discovery-group-ref discovery-group-name=&quot;wahey&quot;/&gt;
             &lt;/scale-down&gt;
          &lt;/slave&gt;
       &lt;/colocated&gt;
    &lt;/replication&gt;
 &lt;/ha-policy&gt;
 </code></pre><h4 id="scale-down-and-clients">Scale Down and Clients</h4>
 <p>When a server is stopping and preparing to scale down it will send a
 message to all its clients informing them which server it is scaling
 down to before disconnecting them. At this point the client will
 reconnect however this will only succeed once the server has completed
 scaledown. This is to ensure that any state such as queues or
 transactions are there for the client when it reconnects. The normal
 reconnect settings apply when the client is reconnecting so these should
 be high enough to deal with the time needed to scale down.</p>
 <h2 id="failover-modes">Failover Modes</h2>
 <p>Apache ActiveMQ Artemis defines two types of client failover:</p>
 <ul>
 <li><p>Automatic client failover</p>
 </li>
 <li><p>Application-level client failover</p>
 </li>
 </ul>
 <p>Apache ActiveMQ Artemis also provides 100% transparent automatic reattachment of
 connections to the same server (e.g. in case of transient network
 problems). This is similar to failover, except it is reconnecting to the
 same server and is discussed in <a href="client-reconnection.html">Client Reconnection and Session Reattachment</a></p>
 <p>During failover, if the client has consumers on any non persistent or
 temporary queues, those queues will be automatically recreated during
 failover on the backup node, since the backup node will not have any
 knowledge of non persistent queues.</p>
 <h3 id="automatic-client-failover">Automatic Client Failover</h3>
 <p>Apache ActiveMQ Artemis clients can be configured to receive knowledge of all live and
 backup servers, so that in event of connection failure at the client -
 live server connection, the client will detect this and reconnect to the
 backup server. The backup server will then automatically recreate any
 sessions and consumers that existed on each connection before failover,
 thus saving the user from having to hand-code manual reconnection logic.</p>
 <p>Apache ActiveMQ Artemis clients detect connection failure when it has not received
 packets from the server within the time given by
 <code>client-failure-check-period</code> as explained in section <a href="connection-ttl.html">Detecting Dead Connections</a>. If the client
 does not receive data in good time, it will assume the connection has
 failed and attempt failover. Also if the socket is closed by the OS,
 usually if the server process is killed rather than the machine itself
 crashing, then the client will failover straight away.</p>
 <p>Apache ActiveMQ Artemis clients can be configured to discover the list of live-backup
 server groups in a number of different ways. They can be configured
 explicitly or probably the most common way of doing this is to use
 <em>server discovery</em> for the client to automatically discover the list.
 For full details on how to configure server discovery, please see <a href="clusters.html">Clusters</a>.
 Alternatively, the clients can explicitly connect to a specific server
 and download the current servers and backups see <a href="clusters.html">Clusters</a>.</p>
 <p>To enable automatic client failover, the client must be configured to
 allow non-zero reconnection attempts (as explained in <a href="client-reconnection.html">Client Reconnection and Session Reattachment</a>).</p>
 <p>By default failover will only occur after at least one connection has
 been made to the live server. In other words, by default, failover will
 not occur if the client fails to make an initial connection to the live
 server - in this case it will simply retry connecting to the live server
 according to the reconnect-attempts property and fail after this number
 of attempts.</p>
 <h4 id="failing-over-on-the-initial-connection">Failing over on the Initial Connection</h4>
 <p>Since the client does not learn about the full topology until after the
 first connection is made there is a window where it does not know about
 the backup. If a failure happens at this point the client can only try
 reconnecting to the original live server. To configure how many attempts
 the client will make you can set the URL parameter <code>initialConnectAttempts</code>.
 The default for this is <code>0</code>, that is try only once. Once the number of
 attempts has been made an exception will be thrown.</p>
 <p>For examples of automatic failover with transacted and non-transacted
 JMS sessions, please see <a href="examples.html">the examples</a> chapter.</p>
 <h4 id="a-note-on-server-replication">A Note on Server Replication</h4>
 <p>Apache ActiveMQ Artemis does not replicate full server state between live and backup
 servers. When the new session is automatically recreated on the backup
 it won&apos;t have any knowledge of messages already sent or acknowledged in
 that session. Any in-flight sends or acknowledgements at the time of
 failover might also be lost.</p>
 <p>By replicating full server state, theoretically we could provide a 100%
 transparent seamless failover, which would avoid any lost messages or
 acknowledgements, however this comes at a great cost: replicating the
 full server state (including the queues, session, etc.). This would
 require replication of the entire server state machine; every operation
 on the live server would have to replicated on the replica server(s) in
 the exact same global order to ensure a consistent replica state. This
 is extremely hard to do in a performant and scalable way, especially
 when one considers that multiple threads are changing the live server
 state concurrently.</p>
 <p>It is possible to provide full state machine replication using
 techniques such as <em>virtual synchrony</em>, but this does not scale well and
 effectively serializes all operations to a single thread, dramatically
 reducing concurrency.</p>
 <p>Other techniques for multi-threaded active replication exist such as
 replicating lock states or replicating thread scheduling but this is
 very hard to achieve at a Java level.</p>
 <p>Consequently it has decided it was not worth massively reducing
 performance and concurrency for the sake of 100% transparent failover.
 Even without 100% transparent failover, it is simple to guarantee <em>once
 and only once</em> delivery, even in the case of failure, by using a
 combination of duplicate detection and retrying of transactions. However
 this is not 100% transparent to the client code.</p>
 <h4 id="handling-blocking-calls-during-failover">Handling Blocking Calls During Failover</h4>
 <p>If the client code is in a blocking call to the server, waiting for a
 response to continue its execution, when failover occurs, the new
 session will not have any knowledge of the call that was in progress.
 This call might otherwise hang for ever, waiting for a response that
 will never come.</p>
 <p>To prevent this, Apache ActiveMQ Artemis will unblock any blocking calls that were in
 progress at the time of failover by making them throw a
 <code>javax.jms.JMSException</code> (if using JMS), or a <code>ActiveMQException</code> with
 error code <code>ActiveMQException.UNBLOCKED</code>. It is up to the client code to
 catch this exception and retry any operations if desired.</p>
 <p>If the method being unblocked is a call to commit(), or prepare(), then
 the transaction will be automatically rolled back and Apache ActiveMQ Artemis will
 throw a <code>javax.jms.TransactionRolledBackException</code> (if using JMS), or a
 <code>ActiveMQException</code> with error code
 <code>ActiveMQException.TRANSACTION_ROLLED_BACK</code> if using the core API.</p>
 <h4 id="handling-failover-with-transactions">Handling Failover With Transactions</h4>
 <p>If the session is transactional and messages have already been sent or
 acknowledged in the current transaction, then the server cannot be sure
 that messages sent or acknowledgements have not been lost during the
 failover.</p>
 <p>Consequently the transaction will be marked as rollback-only, and any
 subsequent attempt to commit it will throw a
 <code>javax.jms.TransactionRolledBackException</code> (if using JMS), or a
 <code>ActiveMQException</code> with error code
 <code>ActiveMQException.TRANSACTION_ROLLED_BACK</code> if using the core API.</p>
 <blockquote>
 <p><strong>Warning</strong></p>
 <p>The caveat to this rule is when XA is used either via JMS or through
 the core API. If 2 phase commit is used and prepare has already been
 called then rolling back could cause a <code>HeuristicMixedException</code>.
 Because of this the commit will throw a <code>XAException.XA_RETRY</code>
 exception. This informs the Transaction Manager that it should retry
 the commit at some later point in time, a side effect of this is that
 any non persistent messages will be lost. To avoid this use persistent
 messages when using XA. With acknowledgements this is not an issue
 since they are flushed to the server before prepare gets called.</p>
 </blockquote>
 <p>It is up to the user to catch the exception, and perform any client side
 local rollback code as necessary. There is no need to manually rollback
 the session - it is already rolled back. The user can then just retry
 the transactional operations again on the same session.</p>
 <p>Apache ActiveMQ Artemis ships with a fully functioning example demonstrating how to do
 this, please see <a href="examples.html">the examples</a> chapter.</p>
 <p>If failover occurs when a commit call is being executed, the server, as
 previously described, will unblock the call to prevent a hang, since no
 response will come back. In this case it is not easy for the client to
 determine whether the transaction commit was actually processed on the
 live server before failure occurred.</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>If XA is being used either via JMS or through the core API then an
 <code>XAException.XA_RETRY</code> is thrown. This is to inform Transaction
 Managers that a retry should occur at some point. At some later point
 in time the Transaction Manager will retry the commit. If the original
 commit has not occurred then it will still exist and be committed, if
 it does not exist then it is assumed to have been committed although
 the transaction manager may log a warning.</p>
 </blockquote>
 <p>To remedy this, the client can simply enable duplicate detection (<a href="duplicate-detection.html">Duplicate Message Detection</a>) in
 the transaction, and retry the transaction operations again after the
 call is unblocked. If the transaction had indeed been committed on the
 live server successfully before failover, then when the transaction is
 retried, duplicate detection will ensure that any durable messages
 resent in the transaction will be ignored on the server to prevent them
 getting sent more than once.</p>
 <blockquote>
 <p><strong>Note</strong></p>
 <p>By catching the rollback exceptions and retrying, catching unblocked
 calls and enabling duplicate detection, once and only once delivery
 guarantees for messages can be provided in the case of failure,
 guaranteeing 100% no loss or duplication of messages.</p>
 </blockquote>
 <h4 id="handling-failover-with-non-transactional-sessions">Handling Failover With Non Transactional Sessions</h4>
 <p>If the session is non transactional, messages or acknowledgements can be
 lost in the event of failover.</p>
 <p>If you wish to provide <em>once and only once</em> delivery guarantees for non
 transacted sessions too, enabled duplicate detection, and catch unblock
 exceptions as described in <a href="ha.html">Handling Blocking Calls During Failover</a></p>
 <h3 id="getting-notified-of-connection-failure">Getting Notified of Connection Failure</h3>
 <p>JMS provides a standard mechanism for getting notified asynchronously of
 connection failure: <code>java.jms.ExceptionListener</code>. Please consult the JMS
 javadoc or any good JMS tutorial for more information on how to use
 this.</p>
 <p>The Apache ActiveMQ Artemis core API also provides a similar feature in the form of the
 class <code>org.apache.activemq.artemis.core.client.SessionFailureListener</code></p>
 <p>Any ExceptionListener or SessionFailureListener instance will always be
 called by ActiveMQ Artemis on event of connection failure, <strong>irrespective</strong> of
 whether the connection was successfully failed over, reconnected or
 reattached, however you can find out if reconnect or reattach has
 happened by either the <code>failedOver</code> flag passed in on the
 <code>connectionFailed</code> on <code>SessionfailureListener</code> or by inspecting the
 error code on the <code>javax.jms.JMSException</code> which will be one of the
 following:</p>
 <p>JMSException error codes</p>
 <table summary="HA Replication Colocation Policy" border="1">
     <colgroup>
         <col>
         <col>
     </colgroup>
     <thead>
     <tr>
         <th>Error code</th>
         <th>Description</th>
     </tr>
     </thead>
     <tbody>
     <tr>
         <td>FAILOVER</td>
         <td>Failover has occurred and we have successfully reattached or reconnected.</td>
     </tr>
     <tr>
         <td>DISCONNECT</td>
         <td>No failover has occurred and we are disconnected.</td>
     </tr>
     </tbody>
 </table>

 <h3 id="application-level-failover">Application-Level Failover</h3>
 <p>In some cases you may not want automatic client failover, and prefer to
 handle any connection failure yourself, and code your own manually
 reconnection logic in your own failure handler. We define this as
 <em>application-level</em> failover, since the failover is handled at the user
 application level.</p>
 <p>To implement application-level failover, if you&apos;re using JMS then you
 need to set an <code>ExceptionListener</code> class on the JMS connection. The
 <code>ExceptionListener</code> will be called by Apache ActiveMQ Artemis in the event that
 connection failure is detected. In your <code>ExceptionListener</code>, you would
 close your old JMS connections, potentially look up new connection
 factory instances from JNDI and creating new connections.</p>
 <p>For a working example of application-level failover, please see <a href="examples.html">the examples</a> chapter.</p>
 <p>If you are using the core API, then the procedure is very similar: you
 would set a <code>FailureListener</code> on the core <code>ClientSession</code> instances.</p>


                                 </section>

     </div>
     <div class="search-results">
         <div class="has-results">

             <h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
             <ul class="search-results-list"></ul>

         </div>
         <div class="no-results">

             <h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>

         </div>
     </div>
 </div>

                         </div>
                     </div>

             </div>


                 <a href="clusters.html" class="navigation navigation-prev " aria-label="Previous page: Clusters">
                     <i class="fa fa-angle-left"></i>
                 </a>


                 <a href="graceful-shutdown.html" class="navigation navigation-next " aria-label="Next page: Graceful Server Shutdown">
                     <i class="fa fa-angle-right"></i>
                 </a>


     </div>

     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
             gitbook.page.hasChanged({"page":{"title":"High Availability and Failover","level":"1.49","depth":1,"next":{"title":"Graceful Server Shutdown","level":"1.50","depth":1,"path":"graceful-shutdown.md","ref":"graceful-shutdown.md","articles":[]},"previous":{"title":"Clusters","level":"1.48","depth":1,"path":"clusters.md","ref":"clusters.md","articles":[]},"dir":"ltr"},"config":{"plugins":[],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"github":"apache/activemq-artemis","theme":"default","githubHost":"https://github.com/","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"ActiveMQ Artemis Documentation","links":{"home":"http://activemq.apache.org/artemis","issues":"https://issues.apache.org/jira/browse/ARTEMIS","contribute":"http://activemq.apache.org/contributing.html"},"gitbook":"3.x.x","description":"ActiveMQ Artemis User Guide and Reference Documentation"},"file":{"path":"ha.md","mtime":"2017-11-01T05:40:43.522Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2017-11-01T05:47:39.664Z"},"basePath":".","book":{"language":""}});
         });
     </script>
 </div>


     <script src="gitbook/gitbook.js"></script>
     <script src="gitbook/theme.js"></script>


         <script src="gitbook/gitbook-plugin-search/search-engine.js"></script>


         <script src="gitbook/gitbook-plugin-search/search.js"></script>


         <script src="gitbook/gitbook-plugin-lunr/lunr.min.js"></script>


         <script src="gitbook/gitbook-plugin-lunr/search-lunr.js"></script>


         <script src="gitbook/gitbook-plugin-sharing/buttons.js"></script>


         <script src="gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>


     </body>
 </html>