tests/common/test_vector.py - impala - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # A TextMatrix is used to generate a set of ImpalaTestVectors. The vectors that are
 # generated are based on one or more ImpalaTestDimensions inputs. These lists define
 # the set of values that are interesting to a test. For example, for file_format
 # these might be 'seq', 'text', etc
 #
 # The ImpalaTestMatrix is then used to generate a set of ImpalaTestVectors. Each
 # ImpalaTestVector contains a single value from each of the input ImpalaTestDimensions.
 # An example:
 #
 # ImpalaTestMatrix.add_dimension('file_format', 'seq', 'text')
 # ImpalaTestMatrix.add_dimension('agg_func', 'min', 'max', 'sum')
 # ImpalaTestMatrix.add_dimension('col_type', 'int', 'bool')
 # test_vectors = ImpalaTestMatrix.generate_test_vectors(...)
 #
 # Would return a collection of ImpalaTestVectors, with each one containing a
 # combination of file_format, agg_func, and col_type:
 # seq, min, int
 # text, max, bool
 # ...
 #
 # A ImpalaTestVector is an object itself, and the 'get_value' function is used to
 # extract the actual value from the ImpalaTestVector for this particular combination:
 # test_vector = test_vectors[0]
 # print test_vector.get_value('file_format')
 #
 # The combinations of ImpalaTestVectors generated can be done in two ways: pairwise
 # and exhaustive. Pairwise provides a way to get good coverage and reduce the total
 # number of combinations generated where exhaustive will generate all valid
 # combinations.
 #
 # Finally, the ImpalaTestMatrix also provides a way to add constraints to the vectors
 # that are generated. This is useful to filter out invalid combinations. These can
 # be added before calling 'generate_test_vectors'. The constraint is a function that
 # accepts a ImpalaTestVector object and returns true if the vector is valid, false
 # otherwise. For example, if we want to make sure 'bool' columns are not used with 'sum':
 #
 # ImpalaTestMatrix.add_constraint(lambda v:\
 #    not (v.get_value('col_type') == 'bool' and v.get_value('agg_func') == 'sum'))
 #
 # Additional examples of usage can be found within the test suites.

 from __future__ import absolute_import, division, print_function
 from collections import OrderedDict
 from itertools import product
 from copy import deepcopy
 import random
 import logging
 import os


 LOG = logging.getLogger(__name__)
 VECTOR = 'vector'
 # Literal constants to refer to some standard dimension names.
 EXEC_OPTION = 'exec_option'
 PROTOCOL = 'protocol'
 TABLE_FORMAT = 'table_format'
 # Literal constants to refer protocol names.
 BEESWAX = 'beeswax'
 HS2 = 'hs2'
 HS2_HTTP = 'hs2-http'


 def assert_exec_option_key(key):
   assert key.islower(), "Exec option " + key + " is not in lower case"


 # A list of test dimension values.
 class ImpalaTestDimension(list):
   def __init__(self, name, *args):
     self.name = name
     self.extend([ImpalaTestVector.Value(name, arg) for arg in args])


 # A test vector that passed to test method. The ImpalaTestVector can be used to
 # extract values for the specified dimension(s)
 class ImpalaTestVector(object):
   def __init__(self, vector_values):
     self.vector_values = vector_values

   def get_value_with_default(self, name, default_value):
     for vector_value in self.vector_values:
       if vector_value.name == name:
         return vector_value.value
     return default_value

   def get_value(self, name):
     for vector_value in self.vector_values:
       if vector_value.name == name:
         return vector_value.value
     raise ValueError("Test vector does not contain value '%s'" % name)

   def get_protocol(self):
     return self.get_value(PROTOCOL)

   def get_table_format(self):
     return self.get_value(TABLE_FORMAT)

   def get_exec_option_dict(self):
     return self.get_value(EXEC_OPTION)

   def get_exec_option(self, option_name):
     value = self.get_value(EXEC_OPTION).get(option_name.lower())
     assert value is not None
     return value

   def set_exec_option(self, option_name, option_value):
     self.get_value(EXEC_OPTION)[option_name.lower()] = str(option_value)

   def unset_exec_option(self, option_name):
     del self.get_value(EXEC_OPTION)[option_name.lower()]

   def __str__(self):
       return ' | '.join(['%s' % vector_value for vector_value in self.vector_values])

   def __repr__(self):
     return str(self)

   # Each value in a test vector is wrapped in the Value object. This wrapping is
   # done internally so this object should never need to be created by the user.
   class Value(object):
     def __init__(self, name, value):
       self.name = name
       self.value = value

     def __str__(self):
       return '"%s: %s"' % (self.name, self.value)

     def __repr__(self):
       return str(self)


 # Matrix -> Collection of vectors
 # Vector -> Call to get specific values
 class ImpalaTestMatrix(object):
   def __init__(self, *args):
     self.dimensions = OrderedDict((arg.name, arg) for arg in args)
     self.constraint_list = list()
     self.independent_exec_option_names = set()

   def add_dimension(self, dimension):
     self.dimensions[dimension.name] = dimension
     if dimension.name == EXEC_OPTION:
       for name in list(self.independent_exec_option_names):
         LOG.warn("Reassigning {} dimension will remove exec option {}={} that was "
             "independently declared through add_exec_option_dimension.".format(
               EXEC_OPTION, name, [v.value for v in self.dimensions[name]]))
         self.clear_dimension(name)

   def assert_unique_exec_option_key(self, key):
     """Assert that 'exec_option' dimension exist and 'key' is not exist yet
     in self.dimension['exec_option']."""
     assert_exec_option_key(key)
     assert EXEC_OPTION in self.dimensions, (
         "Must have '" + EXEC_OPTION + "' dimension previously declared!")

     for vector in self.dimensions[EXEC_OPTION]:
       assert key not in vector.value, (
           "'{}' is already exist in '{}' dimension!".format(key, EXEC_OPTION))

     # 'key' must not previously declared with add_exec_option_dimension().
     assert key not in self.independent_exec_option_names, (
         "['{}']['{}'] was previously declared with non-unique value: {}".format(
           EXEC_OPTION, key, [dim.value for dim in self.dimensions[key]]))

   def add_mandatory_exec_option(self, key, value):
     """Append key=value pair into 'exec_option' dimension."""
     self.assert_unique_exec_option_key(key)

     for vector in self.dimensions[EXEC_OPTION]:
       vector.value[key] = value

   def add_exec_option_dimension(self, dimension):
     """
     Add 'dimension' into 'self.dimensions' and memorize the name.
     During vector generation, all dimensions registered through this method will be
     embedded into 'exec_option' dimension. This is intended to maintain pairwise and
     exhaustive combination correct while eliminating the need for individual test method
     to append the custom exec options into 'exec_option' dimension themself.
     """
     self.assert_unique_exec_option_key(dimension.name)
     assert len(dimension) > 1, (
         "Dimension " + dimension.name + " must have more than 1 possible value! "
         "Use add_mandatory_exec_option() instead.")

     self.add_dimension(dimension)
     self.independent_exec_option_names.add(dimension.name)

   def clear(self):
     self.dimensions.clear()
     self.independent_exec_option_names = set()

   def clear_dimension(self, dimension_name):
     del self.dimensions[dimension_name]
     self.independent_exec_option_names.discard(dimension_name)

   def has_dimension(self, dimension_name):
     return dimension_name in self.dimensions

   def generate_test_vectors(self, exploration_strategy):
     if not self.dimensions:
       return list()
     # TODO: Check valid exploration strategies, provide more options for exploration
     if exploration_strategy == 'exhaustive':
       return self.__generate_exhaustive_combinations()
     elif exploration_strategy in ['core', 'pairwise']:
       return self.__generate_pairwise_combinations()
     else:
       raise ValueError('Unknown exploration strategy: %s' % exploration_strategy)

   def __deepcopy_vector_values(self, vector_values):
     """Return a deepcopy of vector_values and merge exec options declared through
     add_exec_option_dimension() into 'exec_option' dimension."""
     values = []
     exec_values = []
     exec_option = None
     for val in vector_values:
       if val.name == EXEC_OPTION:
         # 'exec_option' is a map. We need to deepcopy the value for each vector.
         exec_option = deepcopy(val.value)
       elif val.name in self.independent_exec_option_names:
         # save this to merge into exec_option later.
         exec_values.append(val)
       else:
         values.append(val)
     if self.independent_exec_option_names:
       assert exec_option is not None, (
         "Must have '" + EXEC_OPTION + "' dimension previously declared!")
       for val in exec_values:
         exec_option[val.name] = val.value
     if exec_option:
       values.append(ImpalaTestVector.Value(EXEC_OPTION, exec_option))
     return values

   def __generate_exhaustive_combinations(self):
     return [ImpalaTestVector(self.__deepcopy_vector_values(vec))
       for vec in product(*self.__extract_vector_values()) if self.is_valid(vec)]

   def __generate_pairwise_combinations(self):
     # Pairwise fails if the number of inputs == 1. Use exhaustive in this case the
     # results will be the same.
     if len(self.dimensions) == 1:
       return self.__generate_exhaustive_combinations()
     vals = self.__extract_vector_values()
     all_vectors = [v for v in product(*vals) if self.is_valid(v)]
     # Add possibility to shuffle vectors to get different covering vector set.
     # This could excercise 3+ way combinations skipped by the pair wise algorithm.
     seed = os.environ.get('IMPALA_TEST_VECTOR_SEED', None)
     if seed:
       rnd = random.Random(seed)
       rnd.shuffle(all_vectors)
     found = set()
     result = []
     # Originally allpairspy was used for vector set generation to cover all pairs,
     # but it turned out to be unreliable when using constraints (IMPALA-13125). Below
     # is a simple implementation that may use more vectors than needed but will always
     # cover all parameter pairs if possible (constraints can lead to pairs that can't
     # be covered). A caveat is that first the product of all dimensions is needed
     # ('all_vectors'), which grows exponentially with the number of dimensions, but the
     # vector counts in Impala tests are still manageable.
     #
     # Note that there is not much to optimize in the test suites I checked, because due
     # to the constraints and the size differences between dimensions the vector set
     # is dictated by a few dimensions (file_format+exec_options) and this set can easily
     # cover pairs with other dimensions.
     #
     #  TODO: is there a nicer algorithm / is it needed?
     #   pair creation could be modelled as a bipartite graph with dimension pairs A and
     #   test vectors B as vertices, and a minimal set of B would be needed to have
     #   adjacent node for each vertex in A - I am pretty sure that this is polynomial, but
     #   didn't do the research
     # The following steps look for "better vectors" first that cover more dimension pairs
     # to avoid very suboptimal solutions - the example on allpairspy site was covered
     # with 25 vectors instead of the 21 of by allpairspy, which seems acceptable.
     remaining = self.__pairwise_step(all_vectors, result, found, len(self.dimensions))
     remaining = self.__pairwise_step(remaining, result, found, 2)
     remaining = self.__pairwise_step(remaining, result, found, 1)
     assert len(remaining) == 0

     res = [ImpalaTestVector(self.__deepcopy_vector_values(vec))
       for vec in result]
     return res

   def add_constraint(self, constraint_func):
     self.constraint_list.append(constraint_func)

   def clear_constraints(self):
     self.constraint_list = list()

   def __extract_vector_values(self):
     # The data is stored as a tuple of (name, [val1, val2, val3]). So extract the
     # actual values from this
     return [v[1] for v in self.dimensions.items()]

   def is_valid(self, vector):
     assert isinstance(vector, tuple)
     assert len(vector) == len(self.dimensions)
     for constraint in self.constraint_list:
       if not constraint(ImpalaTestVector(vector)):
         return False
     return True

   def __pairwise_step(self, vectors, results, pairs_covered, min_cover_count):
     """ Simple algorithm to select a subset of 'vectors' (each with N items) to cover
         every parameter pair.
         With 'min_cover_count'=1 all pairs will be covered (if possible), for bigger
         values only those vectors will be selected that cover at least 'min_cover_count'
         new pairs and the unused vectors will be returned in the result of the function.
         'pairs_covered' is a set of the already covered pairs. The key is a tuple of 4
         elements (i, v[i], j, v[j]) where i<j and both are < N. Each vector covers
         N * (N - 1) pairs.
     """
     remaining = []
     for v in vectors:
       cnt = 0
       for i, x in enumerate(v):
         for j, y in enumerate(v[i + 1:], i + 1):
           t = (i, x, j, y)
           if t not in pairs_covered:
             cnt += 1
       if cnt == 0: continue
       if cnt < min_cover_count:
         remaining.append(v)
         continue
       results.append(v)
       for i, x in enumerate(v):
         for j, y in enumerate(v[i + 1:], i + 1):
           t = (i, x, j, y)
           pairs_covered.add(t)
     return remaining
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# A TextMatrix is used to generate a set of ImpalaTestVectors. The vectors that are
	# generated are based on one or more ImpalaTestDimensions inputs. These lists define
	# the set of values that are interesting to a test. For example, for file_format
	# these might be 'seq', 'text', etc
	#
	# The ImpalaTestMatrix is then used to generate a set of ImpalaTestVectors. Each
	# ImpalaTestVector contains a single value from each of the input ImpalaTestDimensions.
	# An example:
	#
	# ImpalaTestMatrix.add_dimension('file_format', 'seq', 'text')
	# ImpalaTestMatrix.add_dimension('agg_func', 'min', 'max', 'sum')
	# ImpalaTestMatrix.add_dimension('col_type', 'int', 'bool')
	# test_vectors = ImpalaTestMatrix.generate_test_vectors(...)
	#
	# Would return a collection of ImpalaTestVectors, with each one containing a
	# combination of file_format, agg_func, and col_type:
	# seq, min, int
	# text, max, bool
	# ...
	#
	# A ImpalaTestVector is an object itself, and the 'get_value' function is used to
	# extract the actual value from the ImpalaTestVector for this particular combination:
	# test_vector = test_vectors[0]
	# print test_vector.get_value('file_format')
	#
	# The combinations of ImpalaTestVectors generated can be done in two ways: pairwise
	# and exhaustive. Pairwise provides a way to get good coverage and reduce the total
	# number of combinations generated where exhaustive will generate all valid
	# combinations.
	#
	# Finally, the ImpalaTestMatrix also provides a way to add constraints to the vectors
	# that are generated. This is useful to filter out invalid combinations. These can
	# be added before calling 'generate_test_vectors'. The constraint is a function that
	# accepts a ImpalaTestVector object and returns true if the vector is valid, false
	# otherwise. For example, if we want to make sure 'bool' columns are not used with 'sum':
	#
	# ImpalaTestMatrix.add_constraint(lambda v:\
	# not (v.get_value('col_type') == 'bool' and v.get_value('agg_func') == 'sum'))
	#
	# Additional examples of usage can be found within the test suites.

	from __future__ import absolute_import, division, print_function
	from collections import OrderedDict
	from itertools import product
	from copy import deepcopy
	import random
	import logging
	import os


	LOG = logging.getLogger(__name__)
	VECTOR = 'vector'
	# Literal constants to refer to some standard dimension names.
	EXEC_OPTION = 'exec_option'
	PROTOCOL = 'protocol'
	TABLE_FORMAT = 'table_format'
	# Literal constants to refer protocol names.
	BEESWAX = 'beeswax'
	HS2 = 'hs2'
	HS2_HTTP = 'hs2-http'


	def assert_exec_option_key(key):
	assert key.islower(), "Exec option " + key + " is not in lower case"


	# A list of test dimension values.
	class ImpalaTestDimension(list):
	def __init__(self, name, *args):
	self.name = name
	self.extend([ImpalaTestVector.Value(name, arg) for arg in args])


	# A test vector that passed to test method. The ImpalaTestVector can be used to
	# extract values for the specified dimension(s)
	class ImpalaTestVector(object):
	def __init__(self, vector_values):
	self.vector_values = vector_values

	def get_value_with_default(self, name, default_value):
	for vector_value in self.vector_values:
	if vector_value.name == name:
	return vector_value.value
	return default_value

	def get_value(self, name):
	for vector_value in self.vector_values:
	if vector_value.name == name:
	return vector_value.value
	raise ValueError("Test vector does not contain value '%s'" % name)

	def get_protocol(self):
	return self.get_value(PROTOCOL)

	def get_table_format(self):
	return self.get_value(TABLE_FORMAT)

	def get_exec_option_dict(self):
	return self.get_value(EXEC_OPTION)

	def get_exec_option(self, option_name):
	value = self.get_value(EXEC_OPTION).get(option_name.lower())
	assert value is not None
	return value

	def set_exec_option(self, option_name, option_value):
	self.get_value(EXEC_OPTION)[option_name.lower()] = str(option_value)

	def unset_exec_option(self, option_name):
	del self.get_value(EXEC_OPTION)[option_name.lower()]

	def __str__(self):
	return ' \| '.join(['%s' % vector_value for vector_value in self.vector_values])

	def __repr__(self):
	return str(self)

	# Each value in a test vector is wrapped in the Value object. This wrapping is
	# done internally so this object should never need to be created by the user.
	class Value(object):
	def __init__(self, name, value):
	self.name = name
	self.value = value

	def __str__(self):
	return '"%s: %s"' % (self.name, self.value)

	def __repr__(self):
	return str(self)


	# Matrix -> Collection of vectors
	# Vector -> Call to get specific values
	class ImpalaTestMatrix(object):
	def __init__(self, *args):
	self.dimensions = OrderedDict((arg.name, arg) for arg in args)
	self.constraint_list = list()
	self.independent_exec_option_names = set()

	def add_dimension(self, dimension):
	self.dimensions[dimension.name] = dimension
	if dimension.name == EXEC_OPTION:
	for name in list(self.independent_exec_option_names):
	LOG.warn("Reassigning {} dimension will remove exec option {}={} that was "
	"independently declared through add_exec_option_dimension.".format(
	EXEC_OPTION, name, [v.value for v in self.dimensions[name]]))
	self.clear_dimension(name)

	def assert_unique_exec_option_key(self, key):
	"""Assert that 'exec_option' dimension exist and 'key' is not exist yet
	in self.dimension['exec_option']."""
	assert_exec_option_key(key)
	assert EXEC_OPTION in self.dimensions, (
	"Must have '" + EXEC_OPTION + "' dimension previously declared!")

	for vector in self.dimensions[EXEC_OPTION]:
	assert key not in vector.value, (
	"'{}' is already exist in '{}' dimension!".format(key, EXEC_OPTION))

	# 'key' must not previously declared with add_exec_option_dimension().
	assert key not in self.independent_exec_option_names, (
	"['{}']['{}'] was previously declared with non-unique value: {}".format(
	EXEC_OPTION, key, [dim.value for dim in self.dimensions[key]]))

	def add_mandatory_exec_option(self, key, value):
	"""Append key=value pair into 'exec_option' dimension."""
	self.assert_unique_exec_option_key(key)

	for vector in self.dimensions[EXEC_OPTION]:
	vector.value[key] = value

	def add_exec_option_dimension(self, dimension):
	"""
	Add 'dimension' into 'self.dimensions' and memorize the name.
	During vector generation, all dimensions registered through this method will be
	embedded into 'exec_option' dimension. This is intended to maintain pairwise and
	exhaustive combination correct while eliminating the need for individual test method
	to append the custom exec options into 'exec_option' dimension themself.
	"""
	self.assert_unique_exec_option_key(dimension.name)
	assert len(dimension) > 1, (
	"Dimension " + dimension.name + " must have more than 1 possible value! "
	"Use add_mandatory_exec_option() instead.")

	self.add_dimension(dimension)
	self.independent_exec_option_names.add(dimension.name)

	def clear(self):
	self.dimensions.clear()
	self.independent_exec_option_names = set()

	def clear_dimension(self, dimension_name):
	del self.dimensions[dimension_name]
	self.independent_exec_option_names.discard(dimension_name)

	def has_dimension(self, dimension_name):
	return dimension_name in self.dimensions

	def generate_test_vectors(self, exploration_strategy):
	if not self.dimensions:
	return list()
	# TODO: Check valid exploration strategies, provide more options for exploration
	if exploration_strategy == 'exhaustive':
	return self.__generate_exhaustive_combinations()
	elif exploration_strategy in ['core', 'pairwise']:
	return self.__generate_pairwise_combinations()
	else:
	raise ValueError('Unknown exploration strategy: %s' % exploration_strategy)

	def __deepcopy_vector_values(self, vector_values):
	"""Return a deepcopy of vector_values and merge exec options declared through
	add_exec_option_dimension() into 'exec_option' dimension."""
	values = []
	exec_values = []
	exec_option = None
	for val in vector_values:
	if val.name == EXEC_OPTION:
	# 'exec_option' is a map. We need to deepcopy the value for each vector.
	exec_option = deepcopy(val.value)
	elif val.name in self.independent_exec_option_names:
	# save this to merge into exec_option later.
	exec_values.append(val)
	else:
	values.append(val)
	if self.independent_exec_option_names:
	assert exec_option is not None, (
	"Must have '" + EXEC_OPTION + "' dimension previously declared!")
	for val in exec_values:
	exec_option[val.name] = val.value
	if exec_option:
	values.append(ImpalaTestVector.Value(EXEC_OPTION, exec_option))
	return values

	def __generate_exhaustive_combinations(self):
	return [ImpalaTestVector(self.__deepcopy_vector_values(vec))
	for vec in product(*self.__extract_vector_values()) if self.is_valid(vec)]

	def __generate_pairwise_combinations(self):
	# Pairwise fails if the number of inputs == 1. Use exhaustive in this case the
	# results will be the same.
	if len(self.dimensions) == 1:
	return self.__generate_exhaustive_combinations()
	vals = self.__extract_vector_values()
	all_vectors = [v for v in product(*vals) if self.is_valid(v)]
	# Add possibility to shuffle vectors to get different covering vector set.
	# This could excercise 3+ way combinations skipped by the pair wise algorithm.
	seed = os.environ.get('IMPALA_TEST_VECTOR_SEED', None)
	if seed:
	rnd = random.Random(seed)
	rnd.shuffle(all_vectors)
	found = set()
	result = []
	# Originally allpairspy was used for vector set generation to cover all pairs,
	# but it turned out to be unreliable when using constraints (IMPALA-13125). Below
	# is a simple implementation that may use more vectors than needed but will always
	# cover all parameter pairs if possible (constraints can lead to pairs that can't
	# be covered). A caveat is that first the product of all dimensions is needed
	# ('all_vectors'), which grows exponentially with the number of dimensions, but the
	# vector counts in Impala tests are still manageable.
	#
	# Note that there is not much to optimize in the test suites I checked, because due
	# to the constraints and the size differences between dimensions the vector set
	# is dictated by a few dimensions (file_format+exec_options) and this set can easily
	# cover pairs with other dimensions.
	#
	# TODO: is there a nicer algorithm / is it needed?
	# pair creation could be modelled as a bipartite graph with dimension pairs A and
	# test vectors B as vertices, and a minimal set of B would be needed to have
	# adjacent node for each vertex in A - I am pretty sure that this is polynomial, but
	# didn't do the research
	# The following steps look for "better vectors" first that cover more dimension pairs
	# to avoid very suboptimal solutions - the example on allpairspy site was covered
	# with 25 vectors instead of the 21 of by allpairspy, which seems acceptable.
	remaining = self.__pairwise_step(all_vectors, result, found, len(self.dimensions))
	remaining = self.__pairwise_step(remaining, result, found, 2)
	remaining = self.__pairwise_step(remaining, result, found, 1)
	assert len(remaining) == 0

	res = [ImpalaTestVector(self.__deepcopy_vector_values(vec))
	for vec in result]
	return res

	def add_constraint(self, constraint_func):
	self.constraint_list.append(constraint_func)

	def clear_constraints(self):
	self.constraint_list = list()

	def __extract_vector_values(self):
	# The data is stored as a tuple of (name, [val1, val2, val3]). So extract the
	# actual values from this
	return [v[1] for v in self.dimensions.items()]

	def is_valid(self, vector):
	assert isinstance(vector, tuple)
	assert len(vector) == len(self.dimensions)
	for constraint in self.constraint_list:
	if not constraint(ImpalaTestVector(vector)):
	return False
	return True

	def __pairwise_step(self, vectors, results, pairs_covered, min_cover_count):
	""" Simple algorithm to select a subset of 'vectors' (each with N items) to cover
	every parameter pair.
	With 'min_cover_count'=1 all pairs will be covered (if possible), for bigger
	values only those vectors will be selected that cover at least 'min_cover_count'
	new pairs and the unused vectors will be returned in the result of the function.
	'pairs_covered' is a set of the already covered pairs. The key is a tuple of 4
	elements (i, v[i], j, v[j]) where i<j and both are < N. Each vector covers
	N * (N - 1) pairs.
	"""
	remaining = []
	for v in vectors:
	cnt = 0
	for i, x in enumerate(v):
	for j, y in enumerate(v[i + 1:], i + 1):
	t = (i, x, j, y)
	if t not in pairs_covered:
	cnt += 1
	if cnt == 0: continue
	if cnt < min_cover_count:
	remaining.append(v)
	continue
	results.append(v)
	for i, x in enumerate(v):
	for j, y in enumerate(v[i + 1:], i + 1):
	t = (i, x, j, y)
	pairs_covered.add(t)
	return remaining