Add dtest for repairing wide rows patch by jasobrown, reviewed by Paulo Motta for CASSANDRA-13899

commit: 6ea3964d18b54b4e23b6e7ebf63ca42080e8404b [log] [tgz]
author: Jason Brown <jasedbrown@gmail.com> Thu Sep 28 06:39:24 2017 -0700
committer: Jason Brown <jasedbrown@gmail.com> Wed Oct 04 16:03:38 2017 +0900
tree: 5230e76312cf813c4a64aaaf44e78e261056f94a
parent: b0f34e3a6b41c3be5f86dcb8db4f32de9436b071 [diff]
diff --git a/repair_tests/repair_test.py b/repair_tests/repair_test.py
index ad46d18..1d5bf8f 100644
--- a/repair_tests/repair_test.py
+++ b/repair_tests/repair_test.py

@@ -1,3 +1,5 @@
+import os
+import os.path
 import threading
 import time
 import re
@@ -1061,6 +1063,24 @@
         _, _, rc = node2.stress(['read', 'n=1M', 'no-warmup', '-rate', 'threads=30'], whitelist=True)
         self.assertEqual(rc, 0)
 
+    @since('4.0')
+    def test_wide_row_repair(self):
+        """
+        @jira_ticket CASSANDRA-13899
+        Make sure compressed vs uncompressed blocks are handled correctly when stream decompressing
+        """
+        cluster = self.cluster
+        cluster.set_configuration_options(values={'hinted_handoff_enabled': False})
+        cluster.populate(2).start(wait_for_binary_proto=True)
+        node1, node2 = cluster.nodelist()
+        node2.stop(wait_other_notice=True)
+        profile_path = os.path.join(os.getcwd(), 'stress_profiles/repair_wide_rows.yaml')
+        print("yaml = " + profile_path)
+        node1.stress(['user', 'profile=' + profile_path, 'n=50', 'ops(insert=1)', 'no-warmup', '-rate', 'threads=8',
+                      '-insert', 'visits=FIXED(100K)', 'revisit=FIXED(100K)'])
+        node2.start(wait_for_binary_proto=True)
+        node2.repair()
+
     def test_dead_coordinator(self):
         """
         @jira_ticket CASSANDRA-11824

diff --git a/stress_profiles/repair_wide_rows.yaml b/stress_profiles/repair_wide_rows.yaml
new file mode 100644
index 0000000..87f46f0
--- /dev/null
+++ b/stress_profiles/repair_wide_rows.yaml

@@ -0,0 +1,54 @@
+keyspace: stresscql
+keyspace_definition: |
+  CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 2};
+
+table: typestest
+table_definition: |
+  CREATE TABLE typestest (
+        key text,
+        col1 text,
+        val blob,
+        PRIMARY KEY(key, col1)
+  ) 
+    WITH compaction = { 'class':'LeveledCompactionStrategy' }
+    AND compression = {'chunk_length_in_kb': '1', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'};
+
+#
+# Optional meta information on the generated columns in the above table
+# The min and max only apply to text and blob types
+# The distribution field represents the total unique population
+# distribution of that column across rows.  Supported types are
+# 
+#      EXP(min..max)                        An exponential distribution over the range [min..max]
+#      EXTREME(min..max,shape)              An extreme value (Weibull) distribution over the range [min..max]
+#      GAUSSIAN(min..max,stdvrng)           A gaussian/normal distribution, where mean=(min+max)/2, and stdev is (mean-min)/stdvrng
+#      GAUSSIAN(min..max,mean,stdev)        A gaussian/normal distribution, with explicitly defined mean and stdev
+#      UNIFORM(min..max)                    A uniform distribution over the range [min, max]
+#      FIXED(val)                           A fixed distribution, always returning the same value
+#      SEQ(min..max)                        A fixed sequence, returning values in the range min to max sequentially (starting based on seed), wrapping if necessary.
+#      Aliases: extr, gauss, normal, norm, weibull
+#
+#      If preceded by ~, the distribution is inverted
+#
+# Defaults for all columns are size: uniform(4..8), population: uniform(1..100B), cluster: fixed(1)
+#
+columnspec:
+  - name: key
+    size: fixed(10)
+    population: fixed(1)     # the range of unique values to select for the field (default is 100Billion)
+  - name: col1
+    cluster: fixed(1M)
+  - name: val
+    size: fixed(1K)
+
+insert:
+  partitions: fixed(1)       # number of unique partitions to update in a single operation
+                                  # if batchcount > 1, multiple batches will be used but all partitions will
+                                  # occur in all batches (unless they finish early); only the row counts will vary
+  batchtype: LOGGED               # type of batch to use
+  select: fixed(10)/10       # uniform chance any single generated CQL row will be visited in a partition;
+                                  # generated for each partition independently, each time we visit it
+queries:
+   simple1:
+      cql: select * from typestest where key = ? and col1 = ? LIMIT 100
+      fields: samerow             # samerow or multirow (select arguments from the same row, or randomly from all rows in the partition)
commit	6ea3964d18b54b4e23b6e7ebf63ca42080e8404b	[log] [tgz]
author	Jason Brown <jasedbrown@gmail.com>	Thu Sep 28 06:39:24 2017 -0700
committer	Jason Brown <jasedbrown@gmail.com>	Wed Oct 04 16:03:38 2017 +0900
tree	5230e76312cf813c4a64aaaf44e78e261056f94a
parent	b0f34e3a6b41c3be5f86dcb8db4f32de9436b071 [diff]