Fix flaky test_pending_range
patch by Andrés de la Peña; reviewed by Berenguer Blasi for CASSANDRA-16614
Co-authored-by: Andrés de la Peña <a.penya.garcia@gmail.com>
Co-authored-by: Bereng <berenguerblasi@gmail.com>
diff --git a/README.md b/README.md
index b7efc05..efefb21 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,19 @@
The tests will use this directory by default, avoiding the need for any
environment variable (that still will have precedence if given though).
+To run a specific test file, class or individual test, you only have to
+pass its path as an argument:
+
+ pytest --cassandra-dir=~/path/to/cassandra pending_range_test.py
+ pytest --cassandra-dir=~/path/to/cassandra pending_range_test.py::TestPendingRangeMovements
+ pytest --cassandra-dir=~/path/to/cassandra pending_range_test.py::TestPendingRangeMovements::test_pending_range
+
+When adding a new test or modifying an existing one, it's always a good idea to
+run it several times to make sure it is stable. This can be easily done with
+the ``--count`` option. For example, to run a test class 10 times:
+
+ pytest --count=10 --cassandra-dir=~/path/to/cassandra pending_range_test.py
+
Existing tests are probably the best place to start to look at how to write
tests.
diff --git a/pending_range_test.py b/pending_range_test.py
index 6371312..e643ad7 100644
--- a/pending_range_test.py
+++ b/pending_range_test.py
@@ -1,6 +1,7 @@
import logging
import pytest
import re
+import threading
from cassandra.query import SimpleStatement
@@ -23,7 +24,8 @@
cluster.set_log_level('DEBUG')
# Create 5 node cluster
- cluster.populate(5).start()
+ ring_delay_ms = 3_600_000 # 1 hour
+ cluster.populate(5).start(jvm_args=['-Dcassandra.ring_delay_ms={}'.format(ring_delay_ms)])
node1, node2 = cluster.nodelist()[0:2]
# Set up RF=3 keyspace
@@ -46,27 +48,30 @@
mark = node1.mark_log()
- # Move a node
- node1.nodetool('move {}'.format(token))
+ # Move a node without waiting for the response of nodetool, so we don't have to wait for ring_delay
+ threading.Thread(target=(lambda: node1.nodetool('move {}'.format(token)))).start()
# Watch the log so we know when the node is moving
node1.watch_log_for('Moving .* to {}'.format(token), timeout=10, from_mark=mark)
- node1.watch_log_for('Sleeping 30000 ms before start streaming/fetching ranges', timeout=10, from_mark=mark)
+ node1.watch_log_for('Sleeping {} ms before start streaming/fetching ranges'.format(ring_delay_ms),
+ timeout=10, from_mark=mark)
- if cluster.version() >= '2.2':
- if cluster.version() >= '4.0':
- node2.watch_log_for('127.0.0.1:7000 state MOVING', timeout=10, filename='debug.log')
+ # Watch the logs so we know when all the nodes see the status update to MOVING
+ for node in cluster.nodelist():
+ if cluster.version() >= '2.2':
+ if cluster.version() >= '4.0':
+ node.watch_log_for('127.0.0.1:7000 state MOVING', timeout=10, filename='debug.log')
+ else:
+ node.watch_log_for('127.0.0.1 state moving', timeout=10, filename='debug.log')
else:
- node2.watch_log_for('127.0.0.1 state moving', timeout=10, filename='debug.log')
- else:
- # 2.1 doesn't have debug.log, so we are logging at trace, and look
- # in the system.log file
- node2.watch_log_for('127.0.0.1 state moving', timeout=10, filename='system.log')
+ # 2.1 doesn't have debug.log, so we are logging at trace, and look
+ # in the system.log file
+ node.watch_log_for('127.0.0.1 state moving', timeout=10, filename='system.log')
# Once the node is MOVING, kill it immediately, let the other nodes notice
node1.stop(gently=False, wait_other_notice=True)
- # Verify other nodes believe this is Down/Moving
+ # Verify other nodes believe that the killed node is Down/Moving
out, _, _ = node2.nodetool('ring')
logger.debug("Nodetool Ring output: {}".format(out))
assert re.search('127\.0\.0\.1.*?Down.*?Moving', out) is not None
diff --git a/requirements.txt b/requirements.txt
index 8e7ac0a..cf618d5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,7 @@
mock
pytest==3.6.4
pytest-timeout
+pytest-repeat
parse
pycodestyle
psutil