blob: 55ab6f83d31d41e58e351aa159c432dc88b1d378 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "orc/OrcFile.hh"
#include "Adaptor.hh"
#include "ToolTest.hh"
#include "wrap/gmock.h"
#include "wrap/gtest-wrapper.h"
TEST(TestFileContents, testRaw) {
const std::string pgm = findProgram("tools/src/orc-contents");
const std::string file = findExample("TestOrcFile.test1.orc");
const std::string expected =
"{\"boolean1\": false, \"byte1\": 1, \"short1\": 1024, \"int1\": 65536, "
"\"long1\": 9223372036854775807, \"float1\": 1, \"double1\": -15,"
" \"bytes1\": [0, 1, 2, 3, 4], \"string1\": \"hi\", \"middle\": "
"{\"list\": [{\"int1\": 1, \"string1\": \"bye\"}, {\"int1\": 2, "
"\"string1\": \"sigh\"}]}, \"list\": [{\"int1\": 3, \"string1\": "
"\"good\"}, {\"int1\": 4, \"string1\": \"bad\"}], \"map\": []}\n"
"{\"boolean1\": true, \"byte1\": 100, \"short1\": 2048, \"int1\": 65536,"
" \"long1\": 9223372036854775807, \"float1\": 2, \"double1\": -5, "
"\"bytes1\": [], \"string1\": \"bye\", \"middle\": {\"list\": "
"[{\"int1\": 1, \"string1\": \"bye\"}, {\"int1\": 2, \"string1\":"
" \"sigh\"}]}, \"list\": [{\"int1\": 100000000, \"string1\": \"cat\"},"
" {\"int1\": -100000, \"string1\": \"in\"}, {\"int1\": 1234, "
"\"string1\": \"hat\"}], \"map\": [{\"key\": \"chani\", \"value\": "
"{\"int1\": 5, \"string1\": \"chani\"}}, {\"key\": \"mauddib\", "
"\"value\": {\"int1\": 1, \"string1\": \"mauddib\"}}]}\n";
std::string output;
std::string error;
EXPECT_EQ(0, runProgram({pgm, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
}
TEST(TestFileContents, testSelectedColumns) {
const std::string pgm = findProgram("tools/src/orc-contents");
const std::string file = findExample("TestOrcFile.test1.orc");
const std::string columnFields = "1,3,5,7";
const std::string columnTypeIds = "2,4,6,8";
const std::string columnNames = "byte1,int1,float1,bytes1";
const std::string expected =
"{\"byte1\": 1, \"int1\": 65536, \"float1\": 1, \"bytes1\": [0, 1, 2, 3, 4]}\n"
"{\"byte1\": 100, \"int1\": 65536, \"float1\": 2, \"bytes1\": []}\n";
std::string output;
std::string error;
EXPECT_EQ(0, runProgram({pgm, "--columns=" + columnFields, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columns", columnFields, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "-c", columnFields, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnTypeIds=" + columnTypeIds, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnTypeIds", columnTypeIds, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "-t", columnTypeIds, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnNames=" + columnNames, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnNames", columnNames, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "-n", columnNames, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
}
TEST(TestFileContents, testNestedColumns) {
const std::string pgm = findProgram("tools/src/orc-contents");
const std::string file = findExample("complextypes_iceberg.orc");
const std::string columnTypeIds = "1,15,16";
const std::string columnNames = "id,nested_struct.a,nested_struct.b";
const std::string expected = "{\"id\": 8, \"nested_struct\": {\"a\": -1, \"b\": [-1]}}\n";
std::string output;
std::string error;
EXPECT_EQ(0, runProgram({pgm, "--columnTypeIds=" + columnTypeIds, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnTypeIds", columnTypeIds, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "-t", columnTypeIds, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnNames=" + columnNames, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "--columnNames", columnNames, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
EXPECT_EQ(0, runProgram({pgm, "-n", columnNames, file}, output, error));
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
}
TEST(TestFileContents, testInvalidName) {
const std::string pgm = findProgram("tools/src/orc-contents");
const std::string file = findExample("TestOrcFile.test1.orc");
const std::string error_msg =
"Invalid column selected abc. Valid names are boolean1, byte1, bytes1, double1, "
"float1, int1, list, list.int1, list.string1, long1, map, map.int1, map.string1, "
"middle, middle.list, middle.list.int1, middle.list.string1, short1, string1";
std::string output;
std::string error;
EXPECT_EQ(1, runProgram({pgm, "-n", "byte1,abc", file}, output, error));
EXPECT_EQ("", output);
EXPECT_NE(std::string::npos, error.find(error_msg));
}
TEST(TestFileContents, testDecimal64V2) {
const std::string pgm = findProgram("tools/src/orc-contents");
const std::string file = findExample("decimal64_v2.orc");
const std::string expected =
"{\"a\": 17292380420, \"b\": 24, \"c\": 36164.16, \"d\": 0.03, \"e\": 0.01}\n"
"{\"a\": 17292380421, \"b\": 38, \"c\": 63351.70, \"d\": 0.08, \"e\": 0.01}\n"
"{\"a\": 17292380421, \"b\": 28, \"c\": 42673.96, \"d\": 0.09, \"e\": 0.06}\n"
"{\"a\": 17292380421, \"b\": 40, \"c\": 76677.60, \"d\": 0.05, \"e\": 0.04}\n"
"{\"a\": 17292380421, \"b\": 2, \"c\": 2096.48, \"d\": 0.07, \"e\": 0.07}\n"
"{\"a\": 17292380421, \"b\": 42, \"c\": 45284.82, \"d\": 0.07, \"e\": 0.05}\n"
"{\"a\": 17292380421, \"b\": 10, \"c\": 18572.90, \"d\": 0.01, \"e\": 0.08}\n"
"{\"a\": 17292380422, \"b\": 12, \"c\": 14836.80, \"d\": 0.09, \"e\": 0.06}\n"
"{\"a\": 17292380422, \"b\": 41, \"c\": 82152.52, \"d\": 0.07, \"e\": 0.02}\n"
"{\"a\": 17292380422, \"b\": 38, \"c\": 47240.84, \"d\": 0.10, \"e\": 0.00}\n";
const std::string error_msg =
"Warning: ORC file " + file + " was written in an unknown format version UNSTABLE-PRE-2.0\n";
std::string output;
std::string error;
EXPECT_EQ(0, runProgram({pgm, file}, output, error)) << error;
EXPECT_EQ(expected, output);
EXPECT_EQ(error_msg, error);
}