blob: 779397c7b74e6de70b39f45979318300e2f43fe8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Targeted Impala tests for different tuple delimiters, field delimiters,
# and escape characters.
#
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import (
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
class TestDelimitedText(ImpalaTestSuite):
"""
Tests delimited text files with different tuple delimiters, field delimiters
and escape characters.
"""
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestDelimitedText, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
# Only run on delimited text with no compression.
cls.ImpalaTestMatrix.add_dimension(
create_uncompressed_text_dimension(cls.get_workload()))
def test_delimited_text(self, vector, unique_database):
self.run_test_case('QueryTest/delimited-text', vector, unique_database)
def test_delimited_text_newlines(self, vector, unique_database):
""" Test text with newlines in strings - IMPALA-1943. Execute queries from Python to
avoid issues with newline handling in test file format. """
self.execute_query_expect_success(self.client, """
create table if not exists %s.nl_queries
(c1 string, c2 string, c3 string)
row format delimited
fields terminated by '\002'
lines terminated by '\001'
stored as textfile
""" % unique_database)
# Create test data with newlines in various places
self.execute_query_expect_success(self.client, """
insert into %s.nl_queries
values ("the\\n","\\nquick\\nbrown","fox\\n"),
("\\njumped","over the lazy\\n","\\ndog")""" % unique_database)
result = self.execute_query("select * from %s.nl_queries" % unique_database)
assert len(result.data) == 2
assert result.data[0].split("\t") == ["the\n", "\nquick\nbrown", "fox\n"]
assert result.data[1].split("\t") == ["\njumped","over the lazy\n","\ndog"]
# The row count may be computed without parsing each row, so could be inconsistent.
result = self.execute_query("select count(*) from %s.nl_queries" % unique_database)
assert len(result.data) == 1
assert result.data[0] == "2"
def test_delimited_text_latin_chars(self, vector, unique_database):
"""Verifies Impala is able to properly handle delimited text that contains
extended ASCII/latin characters. Marked as running serial because of shared
cleanup/setup"""
self.run_test_case('QueryTest/delimited-latin-text', vector, unique_database,
encoding="latin-1")