ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to zookeeper cluster (phunt via mahadev)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/zookeeper/branches/branch-3.2@925339 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 87cde99..8127ae0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,11 @@
+3.2 branch
+
+Backward compatible changes:
+
+BUGFIXES:
+ ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to
+ zookeeper cluster (phunt via mahadev)
+
Release 3.2.2 - 2009-12-11
Backward compatible changes:
diff --git a/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml b/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
index 32ad4b5..529994a 100644
--- a/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
+++ b/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
@@ -479,12 +479,13 @@
which has be reestablished on a different server. The normal cause of this error is
a client that sends a request to a server, but the network packet gets delayed, so
the client times out and connects to a new server. When the delayed packet arrives at
- the first server, the old server detects that the session has moved and sends back
- the SESSION_MOVED error. Clients normally do not see this error since they do not read
+ the first server, the old server detects that the session has moved, and closes the
+ client connection. Clients normally do not see this error since they do not read
from those old connections. (Old connections are usually closed.) One situation in which this
- error can be seen is when two clients try to reestablish the same connection using
+ condition can be seen is when two clients try to reestablish the same connection using
a saved session id and password. One of the clients will reestablish the connection
- and the second client will get the SessionMovedException.</para>
+ and the second client will be disconnected (causing the pair to attempt to re-establish
+ it's connection/session indefinitely).</para>
</section>
diff --git a/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java b/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
index 0669158..cfc8490 100644
--- a/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
+++ b/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
@@ -27,6 +27,7 @@
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.KeeperException.Code;
+import org.apache.zookeeper.KeeperException.SessionMovedException;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
@@ -46,6 +47,7 @@
import org.apache.zookeeper.proto.SyncRequest;
import org.apache.zookeeper.proto.SyncResponse;
import org.apache.zookeeper.server.DataTree.ProcessTxnResult;
+import org.apache.zookeeper.server.NIOServerCnxn;
import org.apache.zookeeper.server.NIOServerCnxn.Factory;
import org.apache.zookeeper.server.ZooKeeperServer.ChangeRecord;
import org.apache.zookeeper.txn.CreateSessionTxn;
@@ -253,6 +255,17 @@
rsp = new GetChildrenResponse(children);
break;
}
+ } catch (SessionMovedException e) {
+ // session moved is a connection level error, we need to tear
+ // down the connection otw ZOOKEEPER-710 might happen
+ // ie client on slow follower starts to renew session, fails
+ // before this completes, then tries the fast follower (leader)
+ // and is successful, however the initial renew is then
+ // successfully fwd/processed by the leader and as a result
+ // the client and leader disagree on where the client is most
+ // recently attached (and therefore invalid SESSION MOVED generated)
+ ((NIOServerCnxn)request.cnxn).sendBuffer(NIOServerCnxn.closeConn);
+ return;
} catch (KeeperException e) {
err = e.code();
} catch (Exception e) {
diff --git a/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java b/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java
index 10bc102..86b06cd 100644
--- a/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java
+++ b/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java
@@ -351,20 +351,22 @@
void sendBuffer(ByteBuffer bb) {
try {
- // We check if write interest here because if it is NOT set, nothing
- // is queued, so
- // we can try to send the buffer right away without waking up the
- // selector
- if ((sk.interestOps() & SelectionKey.OP_WRITE) == 0) {
- try {
- sock.write(bb);
- } catch (IOException e) {
- // we are just doing best effort right now
+ if (bb != closeConn) {
+ // We check if write interest here because if it is NOT set, nothing
+ // is queued, so
+ // we can try to send the buffer right away without waking up the
+ // selector
+ if ((sk.interestOps() & SelectionKey.OP_WRITE) == 0) {
+ try {
+ sock.write(bb);
+ } catch (IOException e) {
+ // we are just doing best effort right now
+ }
}
- }
- // if there is nothing left to send, we are done
- if (bb.remaining() == 0) {
- return;
+ // if there is nothing left to send, we are done
+ if (bb.remaining() == 0) {
+ return;
+ }
}
synchronized (factory) {
sk.selector().wakeup();
diff --git a/src/java/test/org/apache/zookeeper/test/QuorumTest.java b/src/java/test/org/apache/zookeeper/test/QuorumTest.java
index 179d7c4..5fff53d 100644
--- a/src/java/test/org/apache/zookeeper/test/QuorumTest.java
+++ b/src/java/test/org/apache/zookeeper/test/QuorumTest.java
@@ -32,6 +32,7 @@
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.server.quorum.FollowerHandler;
import org.apache.zookeeper.server.quorum.Leader;
@@ -165,14 +166,14 @@
@Test
public void testSessionMoved() throws IOException, InterruptedException, KeeperException {
String hostPorts[] = qb.hostPort.split(",");
- ZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
+ DisconnectableZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
public void process(WatchedEvent event) {
}});
zk.create("/sessionMoveTest", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
// we want to loop through the list twice
for(int i = 0; i < hostPorts.length*2; i++) {
// This should stomp the zk handle
- ZooKeeper zknew = new DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], ClientBase.CONNECTION_TIMEOUT,
+ DisconnectableZooKeeper zknew = new DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], ClientBase.CONNECTION_TIMEOUT,
new Watcher() {public void process(WatchedEvent event) {
}},
zk.getSessionId(),
@@ -181,14 +182,23 @@
try {
zk.setData("/", new byte[1], -1);
fail("Should have lost the connection");
- } catch(KeeperException.SessionMovedException e) {
+ } catch(KeeperException.ConnectionLossException e) {
}
- //zk.close();
+ zk.disconnect(); // close w/o closing session
zk = zknew;
}
zk.close();
}
+ private static class DiscoWatcher implements Watcher {
+ volatile boolean zkDisco = false;
+ public void process(WatchedEvent event) {
+ if (event.getState() == KeeperState.Disconnected) {
+ zkDisco = true;
+ }
+ }
+ }
+
@Test
/**
* Connect to two different servers with two different handles using the same session and
@@ -196,28 +206,37 @@
*/
public void testSessionMove() throws IOException, InterruptedException, KeeperException {
String hps[] = qb.hostPort.split(",");
- ZooKeeper zk = new DisconnectableZooKeeper(hps[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) {
- }});
+ DiscoWatcher oldWatcher = new DiscoWatcher();
+ ZooKeeper zk = new DisconnectableZooKeeper(hps[0],
+ ClientBase.CONNECTION_TIMEOUT, oldWatcher);
zk.create("/t1", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
// This should stomp the zk handle
- ZooKeeper zknew = new DisconnectableZooKeeper(hps[1], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) {
- }}, zk.getSessionId(), zk.getSessionPasswd());
+ DiscoWatcher watcher = new DiscoWatcher();
+ ZooKeeper zknew = new DisconnectableZooKeeper(hps[1],
+ ClientBase.CONNECTION_TIMEOUT, watcher, zk.getSessionId(),
+ zk.getSessionPasswd());
zknew.create("/t2", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
try {
zk.create("/t3", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
fail("Should have lost the connection");
- } catch(KeeperException.SessionMovedException e) {
+ } catch(KeeperException.ConnectionLossException e) {
+ // wait up to 30 seconds for the disco to be delivered
+ for (int i = 0; i < 30; i++) {
+ if (oldWatcher.zkDisco) {
+ break;
+ }
+ Thread.sleep(1000);
+ }
+ assertTrue(oldWatcher.zkDisco);
}
ArrayList<ZooKeeper> toClose = new ArrayList<ZooKeeper>();
toClose.add(zknew);
// Let's just make sure it can still move
for(int i = 0; i < 10; i++) {
- zknew = new DisconnectableZooKeeper(hps[1], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) {
- }}, zk.getSessionId(), zk.getSessionPasswd());
+ zknew = new DisconnectableZooKeeper(hps[1],
+ ClientBase.CONNECTION_TIMEOUT, new DiscoWatcher(),
+ zk.getSessionId(), zk.getSessionPasswd());
toClose.add(zknew);
zknew.create("/t-"+i, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
}