schema_test.py - cassandra-dtest - Git at Google

 import time
 import pytest
 import logging

 from cassandra.concurrent import execute_concurrent_with_args

 from tools.assertions import assert_invalid, assert_all, assert_one, assert_none, assert_some
 from dtest import Tester, create_ks, create_cf

 since = pytest.mark.since
 logger = logging.getLogger(__name__)


 class TestSchema(Tester):

     def test_table_alteration(self):
         """
         Tests that table alters return as expected with many sstables at different schema points
         """
         cluster = self.cluster
         cluster.populate(1).start()
         node1, = cluster.nodelist()
         session = self.patient_cql_connection(node1)
         create_ks(session, 'ks', 1)
         session.execute("use ks;")
         session.execute("create table tbl_o_churn (id int primary key, c0 text, c1 text) "
                         "WITH compaction = {'class': 'SizeTieredCompactionStrategy', 'min_threshold': 1024, 'max_threshold': 1024 };")

         stmt1 = session.prepare("insert into tbl_o_churn (id, c0, c1) values (?, ?, ?)")
         rows_to_insert = 50

         for n in range(5):
             parameters = [(x, 'aaa', 'bbb') for x in range(n * rows_to_insert, (n * rows_to_insert) + rows_to_insert)]
             execute_concurrent_with_args(session, stmt1, parameters, concurrency=rows_to_insert)
             node1.flush()

         session.execute("alter table tbl_o_churn add c2 text")
         session.execute("alter table tbl_o_churn drop c0")
         stmt2 = session.prepare("insert into tbl_o_churn (id, c1, c2) values (?, ?, ?);")

         for n in range(5, 10):
             parameters = [(x, 'ccc', 'ddd') for x in range(n * rows_to_insert, (n * rows_to_insert) + rows_to_insert)]
             execute_concurrent_with_args(session, stmt2, parameters, concurrency=rows_to_insert)
             node1.flush()

         rows = session.execute("select * from tbl_o_churn")
         for row in rows:
             if row.id < rows_to_insert * 5:
                 assert row.c1 == 'bbb'
                 assert row.c2 is None
                 assert not hasattr(row, 'c0')
             else:
                 assert row.c1 == 'ccc'
                 assert row.c2 == 'ddd'
                 assert not hasattr(row, 'c0')

     @since("2.0", max_version="3.X")  # Compact Storage
     def test_drop_column_compact(self):
         session = self.prepare()

         session.execute("USE ks")
         session.execute("CREATE TABLE cf (key int PRIMARY KEY, c1 int, c2 int) WITH COMPACT STORAGE")

         assert_invalid(session, "ALTER TABLE cf DROP c1", "Cannot drop columns from a")

     def test_drop_column_compaction(self):
         session = self.prepare()
         session.execute("USE ks")
         session.execute("CREATE TABLE cf (key int PRIMARY KEY, c1 int, c2 int)")

         # insert some data.
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (0, 1, 2)")
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (1, 2, 3)")
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (2, 3, 4)")

         # drop and readd c1.
         session.execute("ALTER TABLE cf DROP c1")
         session.execute("ALTER TABLE cf ADD c1 int")

         # add another row.
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (3, 4, 5)")

         node = self.cluster.nodelist()[0]
         node.flush()
         node.compact()

         # test that c1 values have been compacted away.
         session = self.patient_cql_connection(node)
         assert_all(session, "SELECT c1 FROM ks.cf", [[None], [None], [None], [4]], ignore_order=True)

     def test_drop_column_queries(self):
         session = self.prepare()

         session.execute("USE ks")
         session.execute("CREATE TABLE cf (key int PRIMARY KEY, c1 int, c2 int)")
         session.execute("CREATE INDEX ON cf(c2)")

         # insert some data.
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (0, 1, 2)")
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (1, 2, 3)")
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (2, 3, 4)")

         # drop and readd c1.
         session.execute("ALTER TABLE cf DROP c1")
         session.execute("ALTER TABLE cf ADD c1 int")

         # add another row.
         session.execute("INSERT INTO cf (key, c1, c2) VALUES (3, 4, 5)")

         # test that old (pre-drop) c1 values aren't returned and new ones are.
         assert_all(session, "SELECT c1 FROM cf", [[None], [None], [None], [4]], ignore_order=True)

         assert_all(session, "SELECT * FROM cf", [[0, None, 2], [1, None, 3], [2, None, 4], [3, 4, 5]], ignore_order=True)

         assert_one(session, "SELECT c1 FROM cf WHERE key = 0", [None])

         assert_one(session, "SELECT c1 FROM cf WHERE key = 3", [4])

         assert_one(session, "SELECT * FROM cf WHERE c2 = 2", [0, None, 2])

         assert_one(session, "SELECT * FROM cf WHERE c2 = 5", [3, 4, 5])

     def test_drop_column_and_restart(self):
         """
         Simply insert data in a table, drop a column involved in the insert and restart the node afterwards.
         This ensures that the dropped_columns system table is properly flushed on the alter or the restart
         fails as in CASSANDRA-11050.

         @jira_ticket CASSANDRA-11050
         """
         session = self.prepare()

         session.execute("USE ks")
         session.execute("CREATE TABLE t (k int PRIMARY KEY, c1 int, c2 int)")

         session.execute("INSERT INTO t (k, c1, c2) VALUES (0, 0, 0)")
         session.execute("ALTER TABLE t DROP c2")

         assert_one(session, "SELECT * FROM t", [0, 0])

         self.cluster.stop()
         self.cluster.start()

         session = self.patient_cql_connection(self.cluster.nodelist()[0])

         session.execute("USE ks")
         assert_one(session, "SELECT * FROM t", [0, 0])

     def test_drop_static_column_and_restart(self):
         """
         Dropping a static column caused an sstable corrupt exception after restarting, here
         we test that we can drop a static column and restart safely.

         @jira_ticket CASSANDRA-12582
         """
         session = self.prepare()

         session.execute("USE ks")
         session.execute("CREATE TABLE ts (id1 int, id2 int, id3 int static, val text, PRIMARY KEY (id1, id2))")

         session.execute("INSERT INTO ts (id1, id2, id3, val) VALUES (1, 1, 0, 'v1')")
         session.execute("INSERT INTO ts (id1, id2, id3, val) VALUES (1, 2, 0, 'v2')")
         session.execute("INSERT INTO ts (id1, id2, id3, val) VALUES (2, 1, 1, 'v3')")

         self.cluster.nodelist()[0].nodetool('flush ks ts')
         assert_all(session, "SELECT * FROM ts", [[1, 1, 0, 'v1'], [1, 2, 0, 'v2'], [2, 1, 1, 'v3']])

         session.execute("alter table ts drop id3")
         assert_all(session, "SELECT * FROM ts", [[1, 1, 'v1'], [1, 2, 'v2'], [2, 1, 'v3']])

         self.cluster.stop()
         self.cluster.start()

         session = self.patient_cql_connection(self.cluster.nodelist()[0])

         session.execute("USE ks")
         assert_all(session, "SELECT * FROM ts", [[1, 1, 'v1'], [1, 2, 'v2'], [2, 1, 'v3']])

     @since('3.0')
     def drop_table_reflected_in_size_estimates_test(self):
         """
         A dropped table should result in its entries being removed from size estimates, on both
         nodes that are up and down at the time of the drop.

         @jira_ticket CASSANDRA-14905
         """
         cluster = self.cluster
         cluster.populate(2).start()
         node1, node2 = cluster.nodelist()
         session = self.patient_exclusive_cql_connection(node1)
         create_ks(session, 'ks1', 2)
         create_ks(session, 'ks2', 2)
         create_cf(session, 'ks1.cf1', columns={'c1': 'text', 'c2': 'text'})
         create_cf(session, 'ks2.cf1', columns={'c1': 'text', 'c2': 'text'})
         create_cf(session, 'ks2.cf2', columns={'c1': 'text', 'c2': 'text'})

         node1.nodetool('refreshsizeestimates')
         node2.nodetool('refreshsizeestimates')
         node2.stop()
         session.execute('DROP TABLE ks2.cf1')
         session.execute('DROP KEYSPACE ks1')
         node2.start(wait_for_binary_proto=True)
         session2 = self.patient_exclusive_cql_connection(node2)

         session.cluster.control_connection.wait_for_schema_agreement()

         assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'")
         assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'")
         assert_some(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'")
         assert_none(session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'")
         assert_none(session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'")
         assert_some(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'")

     @since('3.0')
     def invalid_entries_removed_from_size_estimates_on_restart_test(self):
         """
         Entries for dropped tables/keyspaces should be cleared from size_estimates on restart.

         @jira_ticket CASSANDRA-14905
         """
         cluster = self.cluster
         cluster.populate(1).start()
         node = cluster.nodelist()[0]
         session = self.patient_cql_connection(node)
         session.execute("USE system;")
         session.execute("INSERT INTO size_estimates (keyspace_name, table_name, range_start, range_end, mean_partition_size, partitions_count) VALUES ( 'system_auth', 'bad_table', '-5', '5', 0, 0);")
         # Invalid keyspace and table
         session.execute("INSERT INTO size_estimates (keyspace_name, table_name, range_start, range_end, mean_partition_size, partitions_count) VALUES ( 'bad_keyspace', 'bad_table', '-5', '5', 0, 0);")
         node.stop()
         node.start()
         session = self.patient_cql_connection(node)
         assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='system_auth' AND table_name='bad_table'")
         assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='bad_keyspace'")


     def prepare(self):
         cluster = self.cluster
         cluster.populate(1).start()
         time.sleep(.5)
         nodes = cluster.nodelist()
         session = self.patient_cql_connection(nodes[0])
         create_ks(session, 'ks', 1)
         return session
	import time
	import pytest
	import logging

	from cassandra.concurrent import execute_concurrent_with_args

	from tools.assertions import assert_invalid, assert_all, assert_one, assert_none, assert_some
	from dtest import Tester, create_ks, create_cf

	since = pytest.mark.since
	logger = logging.getLogger(__name__)


	class TestSchema(Tester):

	def test_table_alteration(self):
	"""
	Tests that table alters return as expected with many sstables at different schema points
	"""
	cluster = self.cluster
	cluster.populate(1).start()
	node1, = cluster.nodelist()
	session = self.patient_cql_connection(node1)
	create_ks(session, 'ks', 1)
	session.execute("use ks;")
	session.execute("create table tbl_o_churn (id int primary key, c0 text, c1 text) "
	"WITH compaction = {'class': 'SizeTieredCompactionStrategy', 'min_threshold': 1024, 'max_threshold': 1024 };")

	stmt1 = session.prepare("insert into tbl_o_churn (id, c0, c1) values (?, ?, ?)")
	rows_to_insert = 50

	for n in range(5):
	parameters = [(x, 'aaa', 'bbb') for x in range(n * rows_to_insert, (n * rows_to_insert) + rows_to_insert)]
	execute_concurrent_with_args(session, stmt1, parameters, concurrency=rows_to_insert)
	node1.flush()

	session.execute("alter table tbl_o_churn add c2 text")
	session.execute("alter table tbl_o_churn drop c0")
	stmt2 = session.prepare("insert into tbl_o_churn (id, c1, c2) values (?, ?, ?);")

	for n in range(5, 10):
	parameters = [(x, 'ccc', 'ddd') for x in range(n * rows_to_insert, (n * rows_to_insert) + rows_to_insert)]
	execute_concurrent_with_args(session, stmt2, parameters, concurrency=rows_to_insert)
	node1.flush()

	rows = session.execute("select * from tbl_o_churn")
	for row in rows:
	if row.id < rows_to_insert * 5:
	assert row.c1 == 'bbb'
	assert row.c2 is None
	assert not hasattr(row, 'c0')
	else:
	assert row.c1 == 'ccc'
	assert row.c2 == 'ddd'
	assert not hasattr(row, 'c0')

	@since("2.0", max_version="3.X") # Compact Storage
	def test_drop_column_compact(self):
	session = self.prepare()

	session.execute("USE ks")
	session.execute("CREATE TABLE cf (key int PRIMARY KEY, c1 int, c2 int) WITH COMPACT STORAGE")

	assert_invalid(session, "ALTER TABLE cf DROP c1", "Cannot drop columns from a")

	def test_drop_column_compaction(self):
	session = self.prepare()
	session.execute("USE ks")
	session.execute("CREATE TABLE cf (key int PRIMARY KEY, c1 int, c2 int)")

	# insert some data.
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (0, 1, 2)")
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (1, 2, 3)")
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (2, 3, 4)")

	# drop and readd c1.
	session.execute("ALTER TABLE cf DROP c1")
	session.execute("ALTER TABLE cf ADD c1 int")

	# add another row.
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (3, 4, 5)")

	node = self.cluster.nodelist()[0]
	node.flush()
	node.compact()

	# test that c1 values have been compacted away.
	session = self.patient_cql_connection(node)
	assert_all(session, "SELECT c1 FROM ks.cf", [[None], [None], [None], [4]], ignore_order=True)

	def test_drop_column_queries(self):
	session = self.prepare()

	session.execute("USE ks")
	session.execute("CREATE TABLE cf (key int PRIMARY KEY, c1 int, c2 int)")
	session.execute("CREATE INDEX ON cf(c2)")

	# insert some data.
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (0, 1, 2)")
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (1, 2, 3)")
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (2, 3, 4)")

	# drop and readd c1.
	session.execute("ALTER TABLE cf DROP c1")
	session.execute("ALTER TABLE cf ADD c1 int")

	# add another row.
	session.execute("INSERT INTO cf (key, c1, c2) VALUES (3, 4, 5)")

	# test that old (pre-drop) c1 values aren't returned and new ones are.
	assert_all(session, "SELECT c1 FROM cf", [[None], [None], [None], [4]], ignore_order=True)

	assert_all(session, "SELECT * FROM cf", [[0, None, 2], [1, None, 3], [2, None, 4], [3, 4, 5]], ignore_order=True)

	assert_one(session, "SELECT c1 FROM cf WHERE key = 0", [None])

	assert_one(session, "SELECT c1 FROM cf WHERE key = 3", [4])

	assert_one(session, "SELECT * FROM cf WHERE c2 = 2", [0, None, 2])

	assert_one(session, "SELECT * FROM cf WHERE c2 = 5", [3, 4, 5])

	def test_drop_column_and_restart(self):
	"""
	Simply insert data in a table, drop a column involved in the insert and restart the node afterwards.
	This ensures that the dropped_columns system table is properly flushed on the alter or the restart
	fails as in CASSANDRA-11050.

	@jira_ticket CASSANDRA-11050
	"""
	session = self.prepare()

	session.execute("USE ks")
	session.execute("CREATE TABLE t (k int PRIMARY KEY, c1 int, c2 int)")

	session.execute("INSERT INTO t (k, c1, c2) VALUES (0, 0, 0)")
	session.execute("ALTER TABLE t DROP c2")

	assert_one(session, "SELECT * FROM t", [0, 0])

	self.cluster.stop()
	self.cluster.start()

	session = self.patient_cql_connection(self.cluster.nodelist()[0])

	session.execute("USE ks")
	assert_one(session, "SELECT * FROM t", [0, 0])

	def test_drop_static_column_and_restart(self):
	"""
	Dropping a static column caused an sstable corrupt exception after restarting, here
	we test that we can drop a static column and restart safely.

	@jira_ticket CASSANDRA-12582
	"""
	session = self.prepare()

	session.execute("USE ks")
	session.execute("CREATE TABLE ts (id1 int, id2 int, id3 int static, val text, PRIMARY KEY (id1, id2))")

	session.execute("INSERT INTO ts (id1, id2, id3, val) VALUES (1, 1, 0, 'v1')")
	session.execute("INSERT INTO ts (id1, id2, id3, val) VALUES (1, 2, 0, 'v2')")
	session.execute("INSERT INTO ts (id1, id2, id3, val) VALUES (2, 1, 1, 'v3')")

	self.cluster.nodelist()[0].nodetool('flush ks ts')
	assert_all(session, "SELECT * FROM ts", [[1, 1, 0, 'v1'], [1, 2, 0, 'v2'], [2, 1, 1, 'v3']])

	session.execute("alter table ts drop id3")
	assert_all(session, "SELECT * FROM ts", [[1, 1, 'v1'], [1, 2, 'v2'], [2, 1, 'v3']])

	self.cluster.stop()
	self.cluster.start()

	session = self.patient_cql_connection(self.cluster.nodelist()[0])

	session.execute("USE ks")
	assert_all(session, "SELECT * FROM ts", [[1, 1, 'v1'], [1, 2, 'v2'], [2, 1, 'v3']])

	@since('3.0')
	def drop_table_reflected_in_size_estimates_test(self):
	"""
	A dropped table should result in its entries being removed from size estimates, on both
	nodes that are up and down at the time of the drop.

	@jira_ticket CASSANDRA-14905
	"""
	cluster = self.cluster
	cluster.populate(2).start()
	node1, node2 = cluster.nodelist()
	session = self.patient_exclusive_cql_connection(node1)
	create_ks(session, 'ks1', 2)
	create_ks(session, 'ks2', 2)
	create_cf(session, 'ks1.cf1', columns={'c1': 'text', 'c2': 'text'})
	create_cf(session, 'ks2.cf1', columns={'c1': 'text', 'c2': 'text'})
	create_cf(session, 'ks2.cf2', columns={'c1': 'text', 'c2': 'text'})

	node1.nodetool('refreshsizeestimates')
	node2.nodetool('refreshsizeestimates')
	node2.stop()
	session.execute('DROP TABLE ks2.cf1')
	session.execute('DROP KEYSPACE ks1')
	node2.start(wait_for_binary_proto=True)
	session2 = self.patient_exclusive_cql_connection(node2)

	session.cluster.control_connection.wait_for_schema_agreement()

	assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'")
	assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'")
	assert_some(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'")
	assert_none(session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'")
	assert_none(session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'")
	assert_some(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'")

	@since('3.0')
	def invalid_entries_removed_from_size_estimates_on_restart_test(self):
	"""
	Entries for dropped tables/keyspaces should be cleared from size_estimates on restart.

	@jira_ticket CASSANDRA-14905
	"""
	cluster = self.cluster
	cluster.populate(1).start()
	node = cluster.nodelist()[0]
	session = self.patient_cql_connection(node)
	session.execute("USE system;")
	session.execute("INSERT INTO size_estimates (keyspace_name, table_name, range_start, range_end, mean_partition_size, partitions_count) VALUES ( 'system_auth', 'bad_table', '-5', '5', 0, 0);")
	# Invalid keyspace and table
	session.execute("INSERT INTO size_estimates (keyspace_name, table_name, range_start, range_end, mean_partition_size, partitions_count) VALUES ( 'bad_keyspace', 'bad_table', '-5', '5', 0, 0);")
	node.stop()
	node.start()
	session = self.patient_cql_connection(node)
	assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='system_auth' AND table_name='bad_table'")
	assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='bad_keyspace'")


	def prepare(self):
	cluster = self.cluster
	cluster.populate(1).start()
	time.sleep(.5)
	nodes = cluster.nodelist()
	session = self.patient_cql_connection(nodes[0])
	create_ks(session, 'ks', 1)
	return session