blob: a6dff740bc1009de3f2c9fab65c8c245562a7ef2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill;
import static java.lang.String.format;
import static org.apache.drill.test.TestBuilder.listOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertThat;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.drill.categories.SqlTest;
import org.apache.drill.common.exceptions.UserRemoteException;
import org.apache.drill.exec.store.dfs.WorkspaceSchemaFactory;
import org.apache.drill.test.BaseTestQuery;
import org.apache.drill.test.TestBuilder;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category(SqlTest.class)
public class TestSelectWithOption extends BaseTestQuery {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(WorkspaceSchemaFactory.class);
private File genCSVFile(String name, String... rows) throws IOException {
File file = new File(format("%s/%s.csv", dirTestWatcher.getRootDir(), name));
try (FileWriter fw = new FileWriter(file)) {
for (int i = 0; i < rows.length; i++) {
fw.append(rows[i] + "\n");
}
}
return file;
}
private String genCSVTable(String name, String... rows) throws IOException {
File f = genCSVFile(name, rows);
return format("dfs.`%s`", f.getName());
}
private void testWithResult(String query, Object... expectedResult) throws Exception {
TestBuilder builder = testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("columns");
for (Object o : expectedResult) {
builder = builder.baselineValues(o);
}
builder.build().run();
}
@Test
public void testTextFieldDelimiter() throws Exception {
String tableName = genCSVTable("testTextFieldDelimiter",
"\"b\"|\"0\"",
"\"b\"|\"1\"",
"\"b\"|\"2\"");
String queryTemplate =
"select columns from table(%s (type => 'TeXT', fieldDelimiter => '%s'))";
testWithResult(format(queryTemplate, tableName, ","),
listOf("b\"|\"0"),
listOf("b\"|\"1"),
listOf("b\"|\"2")
);
testWithResult(format(queryTemplate, tableName, "|"),
listOf("b", "0"),
listOf("b", "1"),
listOf("b", "2")
);
}
@Test
public void testTabFieldDelimiter() throws Exception {
String tableName = genCSVTable("testTabFieldDelimiter",
"1\ta",
"2\tb");
String fieldDelimiter = new String(new char[]{92, 116}); // represents \t
testWithResult(format("select columns from table(%s(type=>'TeXT', fieldDelimiter => '%s'))", tableName, fieldDelimiter),
listOf("1", "a"),
listOf("2", "b"));
}
@Test
public void testSingleTextLineDelimiter() throws Exception {
String tableName = genCSVTable("testSingleTextLineDelimiter",
"a|b|c");
testWithResult(format("select columns from table(%s(type => 'TeXT', lineDelimiter => '|'))", tableName),
listOf("a"),
listOf("b"),
listOf("c"));
}
@Test
// '\n' is treated as standard delimiter
// if user has indicated custom line delimiter but input file contains '\n', split will occur on both
public void testCustomTextLineDelimiterAndNewLine() throws Exception {
String tableName = genCSVTable("testTextLineDelimiter",
"b|1",
"b|2");
testWithResult(format("select columns from table(%s(type => 'TeXT', lineDelimiter => '|'))", tableName),
listOf("b"),
listOf("1"),
listOf("b"),
listOf("2"));
}
@Test
public void testTextLineDelimiterWithCarriageReturn() throws Exception {
String tableName = genCSVTable("testTextLineDelimiterWithCarriageReturn",
"1, a\r",
"2, b\r");
String lineDelimiter = new String(new char[]{92, 114, 92, 110}); // represents \r\n
testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => '%s'))", tableName, lineDelimiter),
listOf("1, a"),
listOf("2, b"));
}
@Test
public void testMultiByteLineDelimiter() throws Exception {
String tableName = genCSVTable("testMultiByteLineDelimiter",
"1abc2abc3abc");
testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => 'abc'))", tableName),
listOf("1"),
listOf("2"),
listOf("3"));
}
@Test
public void testDataWithPartOfMultiByteLineDelimiter() throws Exception {
String tableName = genCSVTable("testDataWithPartOfMultiByteLineDelimiter",
"ab1abc2abc3abc");
testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => 'abc'))", tableName),
listOf("ab1"),
listOf("2"),
listOf("3"));
}
@Test
public void testTextQuote() throws Exception {
String tableName = genCSVTable("testTextQuote",
"\"b\"|\"0\"",
"\"b\"|\"1\"",
"\"b\"|\"2\"");
testWithResult(format("select columns from table(%s(type => 'TeXT', fieldDelimiter => '|', quote => '@'))", tableName),
listOf("\"b\"", "\"0\""),
listOf("\"b\"", "\"1\""),
listOf("\"b\"", "\"2\"")
);
String quoteTableName = genCSVTable("testTextQuote2",
"@b@|@0@",
"@b$@c@|@1@");
// It seems that a parameter can not be called "escape"
testWithResult(format("select columns from table(%s(`escape` => '$', type => 'TeXT', fieldDelimiter => '|', quote => '@'))", quoteTableName),
listOf("b", "0"),
listOf("b$@c", "1") // shouldn't $ be removed here?
);
}
@Test
public void testTextComment() throws Exception {
String commentTableName = genCSVTable("testTextComment",
"b|0",
"@ this is a comment",
"b|1");
testWithResult(format("select columns from table(%s(type => 'TeXT', fieldDelimiter => '|', comment => '@'))", commentTableName),
listOf("b", "0"),
listOf("b", "1")
);
}
@Test
public void testTextHeader() throws Exception {
String headerTableName = genCSVTable("testTextHeader",
"b|a",
"b|0",
"b|1");
testWithResult(format("select columns from table(%s(type => 'TeXT', fieldDelimiter => '|', skipFirstLine => true))", headerTableName),
listOf("b", "0"),
listOf("b", "1")
);
testBuilder()
.sqlQuery(format("select a, b from table(%s(type => 'TeXT', fieldDelimiter => '|', extractHeader => true))", headerTableName))
.ordered()
.baselineColumns("b", "a")
.baselineValues("b", "0")
.baselineValues("b", "1")
.build().run();
}
@Test
public void testVariationsCSV() throws Exception {
String csvTableName = genCSVTable("testVariationsCSV",
"a,b",
"c|d");
// Using the defaults in TextFormatConfig (the field delimiter is neither "," not "|")
String[] csvQueries = {
// format("select columns from %s ('TeXT')", csvTableName),
// format("select columns from %s('TeXT')", csvTableName),
format("select columns from table(%s ('TeXT'))", csvTableName),
format("select columns from table(%s (type => 'TeXT'))", csvTableName),
// format("select columns from %s (type => 'TeXT')", csvTableName)
};
for (String csvQuery : csvQueries) {
testWithResult(csvQuery,
listOf("a,b"),
listOf("c|d"));
}
// the drill config file binds .csv to "," delimited
testWithResult(format("select columns from %s", csvTableName),
listOf("a", "b"),
listOf("c|d"));
// setting the delimiter
testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => ','))", csvTableName),
listOf("a", "b"),
listOf("c|d"));
testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => '|'))", csvTableName),
listOf("a,b"),
listOf("c", "d"));
}
@Test
public void testVariationsJSON() throws Exception {
String jsonTableName = genCSVTable("testVariationsJSON",
"{\"columns\": [\"f\",\"g\"]}");
// the extension is actually csv
testWithResult(format("select columns from %s", jsonTableName),
listOf("{\"columns\": [\"f\"", "g\"]}\n")
);
String[] jsonQueries = {
format("select columns from table(%s ('JSON'))", jsonTableName),
format("select columns from table(%s(type => 'JSON'))", jsonTableName),
// we can use named format plugin configurations too!
format("select columns from table(%s(type => 'Named', name => 'json'))", jsonTableName),
};
for (String jsonQuery : jsonQueries) {
testWithResult(jsonQuery, listOf("f","g"));
}
}
@Test
public void testUse() throws Exception {
File f = genCSVFile("testUse",
"{\"columns\": [\"f\",\"g\"]}");
String jsonTableName = String.format("dfs.`%s`", f.getName());
// the extension is actually csv
test("use dfs");
try {
String[] jsonQueries = {
format("select columns from table(%s ('JSON'))", jsonTableName),
format("select columns from table(%s(type => 'JSON'))", jsonTableName),
};
for (String jsonQuery : jsonQueries) {
testWithResult(jsonQuery, listOf("f","g"));
}
testWithResult(format("select length(columns[0]) as columns from table(%s ('JSON'))", jsonTableName), 1L);
} finally {
test("use sys");
}
}
@Test(expected = UserRemoteException.class)
public void testAbsentTable() throws Exception {
String schema = "cp.default";
String tableName = "absent_table";
try {
test("select * from table(`%s`.`%s`(type=>'parquet'))", schema, tableName);
} catch (UserRemoteException e) {
assertThat(e.getMessage(), containsString(String.format("Unable to find table [%s] in schema [%s]", tableName, schema)));
throw e;
}
}
}