blob: b0fb5725310ffd819d69cd9946236b2808c97baa [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Generates random decimal numbers and verifies that mathematical
# operations return correct results under decimal_v2.
import decimal
import math
import pytest
import random
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import create_single_exec_option_dimension
from tests.common.test_vector import ImpalaTestDimension, ImpalaTestMatrix
class TestDecimalFuzz(ImpalaTestSuite):
# Impala's max precision for decimals is 38, so we should have the same in the tests
decimal.getcontext().prec = 38
@classmethod
def get_workload(cls):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
cls.ImpalaTestMatrix = ImpalaTestMatrix()
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
cls.iterations = 10000
def weighted_choice(self, options):
total_weight = sum(options.itervalues())
numeric_choice = random.uniform(0, total_weight)
last_choice = None
for choice, weight in options.iteritems():
if numeric_choice <= weight:
return choice
numeric_choice -= weight
if weight > 0:
last_choice = choice
return last_choice
def get_decimal(self):
'''Returns a 3-tuple with string values of (value, precision, scale). The function
does not always return completely random values, we try to bias it to select
more interesting values.'''
def random_precision():
return random.randint(1, 38)
def extreme_precision():
return 38
precision_weights = {}
precision_weights[random_precision] = 0.8
precision_weights[extreme_precision] = 0.2
precision = self.weighted_choice(precision_weights)()
def random_scale(precision):
return random.randint(0, precision)
def extreme_scale(precision):
return random.choice([0, precision])
scale_weights = {}
scale_weights[random_scale] = 0.9
scale_weights[extreme_scale] = 0.1
scale = self.weighted_choice(scale_weights)(precision)
def random_value(precision):
'''Generates a completely random value.'''
def num_digits_random(precision):
return random.randint(1, precision)
def num_digits_all(precision):
return precision
# Determine how many digits the value is going to have.
num_digits_weights = {}
num_digits_weights[num_digits_random] = 0.8
num_digits_weights[num_digits_all] = 0.2
num_digits = self.weighted_choice(num_digits_weights)(precision)
no_zero = '123456789'
with_zero = '0123456789'
result = random.choice(no_zero)
for _ in range(num_digits - 1):
result += random.choice(with_zero)
return result
def special_case_binary_value(precision):
'''Generates a value that looks like 11111... or 10000... in binary number
system.'''
def exponent_random(precision):
return random.randint(0, int(precision * math.log(10, 2)))
def exponent_max(precision):
return int(precision * math.log(10, 2))
exponent_weights = {}
exponent_weights[exponent_random] = 0.8
exponent_weights[exponent_max] = 0.2
exponent = self.weighted_choice(exponent_weights)(precision)
value = 2 ** exponent
if random.random() < 0.5:
value -= 1
return '{0}'.format(value)
def special_case_decimal_value(precision):
'''Generates a value that looks like 99999... or 10000... in decimal number
system.'''
def num_digits_random(precision):
return random.randint(1, precision)
def num_digits_max(precision):
return precision
num_digits_weights = {}
num_digits_weights[num_digits_random] = 8
num_digits_weights[num_digits_max] = 0.2
num_digits = self.weighted_choice(num_digits_weights)(precision)
value = 10 ** num_digits
if num_digits == precision or random.random() < 0.5:
value -= 1
return '{0}'.format(value)
value_weights = {}
value_weights[random_value] = 0.6
value_weights[special_case_binary_value] = 0.2
value_weights[special_case_decimal_value] = 0.2
value = self.weighted_choice(value_weights)(precision)
# Randomly determine the placement of the decimal mark.
# The smallest index where the decimal mark can be placed in the number string.
min_dot_location = max(len(value) - scale, 0)
# The largest index where the decimal mark can be placed in the number string.
max_dot_location = min(precision - scale, len(value))
dot_location = random.randint(min_dot_location, max_dot_location)
if dot_location == 0:
value = '0.' + value
elif dot_location == len(value):
pass
else:
value = value[:dot_location] + '.' + value[dot_location:]
if random.random() < 0.5:
# Negate the number.
value = '-' + value
return (value, precision, scale)
def result_equals(self, expected, actual):
'''Verify that the expected result is equal to the actual result. We verify equality
by rounding the expected result to different numbers of places and verifying that the
actual result is matched in at least one of the cases.'''
if actual == expected:
return True
if actual is None:
# Overflow
if abs(expected) > decimal.Decimal("9" * 32):
# If the expected result is larger than 10^32 - 1, it's not unreasonable for
# there to be an overflow in Impala because the minimum scale is 6 and
# 38 (max precision) - 6 = 32.
return True
return False
for num_digits_after_dot in xrange(39):
# Reduce the number of digits after the dot in the expected_result to different
# amounts. If it matches the actual result in at least one of the cases, we
# consider the actual result to be acceptable.
truncated_expected = expected.quantize(
decimal.Decimal("1e-{0}".format(num_digits_after_dot)),
rounding=decimal.ROUND_HALF_UP)
if actual == truncated_expected:
return True
return False
def execute_one_decimal_op(self):
'''Executes a single query and compares the result to a result that we computed in
Python.'''
op = random.choice(['+', '-', '*', '/', '%'])
value1, precision1, scale1 = self.get_decimal()
value2, precision2, scale2 = self.get_decimal()
query = ('select cast({value1} as decimal({precision1},{scale1})) {op} '
'cast({value2} as decimal({precision2},{scale2}))').format(op=op,
value1=value1, precision1=precision1, scale1=scale1,
value2=value2, precision2=precision2, scale2=scale2)
try:
result = self.execute_scalar(query, query_options={'decimal_v2': 'true'})
except ImpalaBeeswaxException as e:
result = None
if result is not None:
result = decimal.Decimal(result)
with decimal.localcontext() as ctx:
# Set the decimal context to a large precision initially, so that the
# mathematical operations are performed at a high precision.
ctx.prec = 80
try:
if op == '+':
expected_result = decimal.Decimal(value1) + decimal.Decimal(value2)
elif op == '-':
expected_result = decimal.Decimal(value1) - decimal.Decimal(value2)
elif op == '*':
expected_result = decimal.Decimal(value1) * decimal.Decimal(value2)
elif op == '/':
expected_result = decimal.Decimal(value1) / decimal.Decimal(value2)
elif op == '%':
expected_result = decimal.Decimal(value1) % decimal.Decimal(value2)
else:
assert False
except decimal.InvalidOperation as e:
expected_result = None
except decimal.DivisionByZero as e:
expected_result = None
assert self.result_equals(expected_result, result)
def test_decimal_ops(self, vector):
for _ in xrange(self.iterations):
self.execute_one_decimal_op()
def width_bucket(self, val, min_range, max_range, num_buckets):
# Multiplying the values by 10**40 guarantees that the numbers can be converted
# to int without losing information.
val_int = int(decimal.Decimal(val) * 10**40)
min_range_int = int(decimal.Decimal(min_range) * 10**40)
max_range_int = int(decimal.Decimal(max_range) * 10**40)
if min_range_int >= max_range_int:
return None
if val_int < min_range_int:
return 0
if val_int > max_range_int:
return num_buckets + 1
range_size = max_range_int - min_range_int
dist_from_min = val_int - min_range_int
return (num_buckets * dist_from_min) / range_size + 1
def execute_one_width_bucket(self):
val, val_prec, val_scale = self.get_decimal()
min_range, min_range_prec, min_range_scale = self.get_decimal()
max_range, max_range_prec, max_range_scale = self.get_decimal()
num_buckets = random.randint(1, 2147483647)
query = ('select width_bucket('
'cast({val} as decimal({val_prec},{val_scale})), '
'cast({min_range} as decimal({min_range_prec},{min_range_scale})), '
'cast({max_range} as decimal({max_range_prec},{max_range_scale})), '
'{num_buckets})')
query = query.format(val=val, val_prec=val_prec, val_scale=val_scale,
min_range=min_range, min_range_prec=min_range_prec,
min_range_scale=min_range_scale,
max_range=max_range, max_range_prec=max_range_prec,
max_range_scale=max_range_scale,
num_buckets=num_buckets)
expected_result = self.width_bucket(val, min_range, max_range, num_buckets)
if not expected_result:
return
try:
result = self.execute_scalar(query, query_options={'decimal_v2': 'true'})
assert int(result) == expected_result
except ImpalaBeeswaxException as e:
if "You need to wrap the arguments in a CAST" not in str(e):
# Sometimes the decimal inputs are incompatible with each other, so it's ok
# to ignore this error.
raise e
def test_width_bucket(self, vector):
for _ in xrange(self.iterations):
self.execute_one_width_bucket()