Add dtest for repairing wide rows
patch by jasobrown, reviewed by Paulo Motta for CASSANDRA-13899
diff --git a/repair_tests/repair_test.py b/repair_tests/repair_test.py
index ad46d18..1d5bf8f 100644
--- a/repair_tests/repair_test.py
+++ b/repair_tests/repair_test.py
@@ -1,3 +1,5 @@
+import os
+import os.path
import threading
import time
import re
@@ -1061,6 +1063,24 @@
_, _, rc = node2.stress(['read', 'n=1M', 'no-warmup', '-rate', 'threads=30'], whitelist=True)
self.assertEqual(rc, 0)
+ @since('4.0')
+ def test_wide_row_repair(self):
+ """
+ @jira_ticket CASSANDRA-13899
+ Make sure compressed vs uncompressed blocks are handled correctly when stream decompressing
+ """
+ cluster = self.cluster
+ cluster.set_configuration_options(values={'hinted_handoff_enabled': False})
+ cluster.populate(2).start(wait_for_binary_proto=True)
+ node1, node2 = cluster.nodelist()
+ node2.stop(wait_other_notice=True)
+ profile_path = os.path.join(os.getcwd(), 'stress_profiles/repair_wide_rows.yaml')
+ print("yaml = " + profile_path)
+ node1.stress(['user', 'profile=' + profile_path, 'n=50', 'ops(insert=1)', 'no-warmup', '-rate', 'threads=8',
+ '-insert', 'visits=FIXED(100K)', 'revisit=FIXED(100K)'])
+ node2.start(wait_for_binary_proto=True)
+ node2.repair()
+
def test_dead_coordinator(self):
"""
@jira_ticket CASSANDRA-11824
diff --git a/stress_profiles/repair_wide_rows.yaml b/stress_profiles/repair_wide_rows.yaml
new file mode 100644
index 0000000..87f46f0
--- /dev/null
+++ b/stress_profiles/repair_wide_rows.yaml
@@ -0,0 +1,54 @@
+keyspace: stresscql
+keyspace_definition: |
+ CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 2};
+
+table: typestest
+table_definition: |
+ CREATE TABLE typestest (
+ key text,
+ col1 text,
+ val blob,
+ PRIMARY KEY(key, col1)
+ )
+ WITH compaction = { 'class':'LeveledCompactionStrategy' }
+ AND compression = {'chunk_length_in_kb': '1', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'};
+
+#
+# Optional meta information on the generated columns in the above table
+# The min and max only apply to text and blob types
+# The distribution field represents the total unique population
+# distribution of that column across rows. Supported types are
+#
+# EXP(min..max) An exponential distribution over the range [min..max]
+# EXTREME(min..max,shape) An extreme value (Weibull) distribution over the range [min..max]
+# GAUSSIAN(min..max,stdvrng) A gaussian/normal distribution, where mean=(min+max)/2, and stdev is (mean-min)/stdvrng
+# GAUSSIAN(min..max,mean,stdev) A gaussian/normal distribution, with explicitly defined mean and stdev
+# UNIFORM(min..max) A uniform distribution over the range [min, max]
+# FIXED(val) A fixed distribution, always returning the same value
+# SEQ(min..max) A fixed sequence, returning values in the range min to max sequentially (starting based on seed), wrapping if necessary.
+# Aliases: extr, gauss, normal, norm, weibull
+#
+# If preceded by ~, the distribution is inverted
+#
+# Defaults for all columns are size: uniform(4..8), population: uniform(1..100B), cluster: fixed(1)
+#
+columnspec:
+ - name: key
+ size: fixed(10)
+ population: fixed(1) # the range of unique values to select for the field (default is 100Billion)
+ - name: col1
+ cluster: fixed(1M)
+ - name: val
+ size: fixed(1K)
+
+insert:
+ partitions: fixed(1) # number of unique partitions to update in a single operation
+ # if batchcount > 1, multiple batches will be used but all partitions will
+ # occur in all batches (unless they finish early); only the row counts will vary
+ batchtype: LOGGED # type of batch to use
+ select: fixed(10)/10 # uniform chance any single generated CQL row will be visited in a partition;
+ # generated for each partition independently, each time we visit it
+queries:
+ simple1:
+ cql: select * from typestest where key = ? and col1 = ? LIMIT 100
+ fields: samerow # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)