blob: c12e82c6efe3f68b03c00971d12a16fbdda4bff9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.tool;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
import org.apache.carbondata.core.constants.CarbonVersionConstants;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.datatype.Field;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.sdk.file.*;
import org.apache.commons.io.FileUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
public class CarbonCliTest {
private String path = "./CarbonCliTest";
private String pathBinary = "./CarbonCliTestBinary";
private String buildLines(String... lines) {
ByteArrayOutputStream expectedOut = null;
PrintStream expectedStream = null;
try {
expectedOut = new ByteArrayOutputStream();
expectedStream = new PrintStream(expectedOut);
for (String line : lines) {
expectedStream.println(line);
}
return new String(expectedOut.toByteArray());
} finally {
CarbonUtil.closeStreams(expectedStream, expectedOut);
}
}
@Before
public void before() throws IOException {
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[2];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new String[]{"name"}, 3, 8);
TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new String[]{"name"}, 3, 8);
}
public void buildBinaryData(int rows, Schema schema, String path, String[] sortColumns,
int blockletSize, int blockSize)
throws IOException, InvalidLoadOptionException {
CarbonWriterBuilder builder = CarbonWriter.builder()
.outputPath(path);
if (sortColumns != null) {
builder = builder.sortBy(sortColumns);
}
if (blockletSize != -1) {
builder = builder.withBlockletSize(blockletSize);
}
if (blockSize != -1) {
builder = builder.withBlockSize(blockSize);
}
CarbonWriter writer = builder.withCsvInput(schema).writtenBy("TestUtil").build();
for (int i = 0; i < rows; i++) {
writer.write(new String[]{
"robot" + (i % 10), String.valueOf(i % 3000000), String.valueOf((double) i / 2)});
}
for (int i = 0; i < rows; i++) {
writer.write(new String[]{
"robot" + (i % 10), String.valueOf(i % 3000000), String.valueOf("robot" + i / 2)});
}
writer.close();
}
@Test
public void testInvalidCmd() {
String[] args = {"-cmd", "DD", "-p", path};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
Assert.assertTrue(output.contains("command DD is not supported"));
String[] args2 = {"-p", path};
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args2, stream);
output = new String(out.toByteArray());
Assert.assertTrue(output.contains("Parsing failed. Reason: Missing required option: cmd"));
}
@Test
public void testSummaryOutputIndividual() {
String[] args = {"-cmd", "summary", "-p", path};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
String expectedOutput = buildLines(
"Input Folder: ./CarbonCliTest" ,
"## Summary",
"total: 6 blocks, 2 shards, 12 blocklets, 314 pages, 10,000,000 rows, 28.68MB",
"avg: 4.78MB/block, 2.39MB/blocklet, 1,666,666 rows/block, 833,333 rows/blocklet");
Assert.assertTrue(output.contains(expectedOutput));
String[] args2 = {"-cmd", "summary", "-p", path, "-s"};
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args2, stream);
output = new String(out.toByteArray());
expectedOutput = buildLines(
"Column Name Data Type Column Type SortColumn Encoding Ordinal Id ",
"name STRING dimension true [] 0 NA ",
"age INT measure false [] 1 NA ");
Assert.assertTrue(output.contains(expectedOutput));
String[] args3 = {"-cmd", "summary", "-p", path, "-t"};
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args3, stream);
output = new String(out.toByteArray());
expectedOutput = buildLines(
"## Table Properties",
"schema file not found");
Assert.assertTrue(output.contains(expectedOutput));
String[] args4 = {"-cmd", "summary", "-p", path, "-b", "7"};
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args4, stream);
output = new String(out.toByteArray());
expectedOutput = buildLines(
"BLK BLKLT NumPages NumRows Size " ,
"0 0 28 896,000 2.57MB " ,
"0 1 28 896,000 2.57MB " ,
"1 0 28 896,000 2.57MB " ,
"1 1 28 896,000 2.57MB " ,
"2 0 28 896,000 2.57MB " ,
"2 1 17 520,000 1.49MB ");
Assert.assertTrue(output.contains(expectedOutput));
String[] args5 = {"-cmd", "summary", "-p", path, "-c", "name"};
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args5, stream);
output = new String(out.toByteArray());
expectedOutput = buildLines(
"BLK BLKLT Meta Size Data Size LocalDict DictEntries DictSize AvgPageSize Min% Max% Min Max " ,
"0 0 1.90KB 2.15KB true 2 18.0B 9.0B NA NA robot0 robot1 " ,
"0 1 1.90KB 2.16KB true 3 22.0B 9.0B NA NA robot1 robot3 " ,
"1 0 1.90KB 2.16KB true 3 22.0B 9.0B NA NA robot3 robot5 " ,
"1 1 1.90KB 2.16KB true 3 22.0B 9.0B NA NA robot5 robot7 " ,
"2 0 1.90KB 2.14KB true 2 18.0B 9.0B NA NA robot7 robot8 " ,
"2 1 1.18KB 1.33KB true 2 18.0B 9.0B NA NA robot8 robot9 ");
Assert.assertTrue(output.contains(expectedOutput));
}
@Test
public void testSortColumnsOfSegmentFolder() {
String[] args = {"-cmd", "sort_columns", "-p", path};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
String expectedOutput = buildLines(
"Input Folder: ./CarbonCliTest",
"sorted by name");
Assert.assertTrue(output.contains(expectedOutput));
}
@Test
public void testSummaryOutputAll() {
String[] args = {"-cmd", "summary", "-p", path, "-a", "-c", "age"};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
String expectedOutput = buildLines(
"Input Folder: ./CarbonCliTest" ,
"## Summary",
"total: 6 blocks, 2 shards, 12 blocklets, 314 pages, 10,000,000 rows, 28.68MB",
"avg: 4.78MB/block, 2.39MB/blocklet, 1,666,666 rows/block, 833,333 rows/blocklet");
Assert.assertTrue(output.contains(expectedOutput));
expectedOutput = buildLines(
"Column Name Data Type Column Type SortColumn Encoding Ordinal Id ",
"name STRING dimension true [] 0 NA ",
"age INT measure false [] 1 NA ");
Assert.assertTrue(output.contains(expectedOutput));
expectedOutput = buildLines(
"## Table Properties",
"schema file not found");
Assert.assertTrue(output.contains(expectedOutput));
expectedOutput = buildLines(
"BLK BLKLT NumPages NumRows Size ",
"0 0 28 896,000 2.57MB ",
"0 1 28 896,000 2.57MB ",
"1 0 28 896,000 2.57MB ",
"1 1 28 896,000 2.57MB ");
Assert.assertTrue(output.contains(expectedOutput));
expectedOutput = buildLines(
"BLK BLKLT Meta Size Data Size LocalDict DictEntries DictSize AvgPageSize Min% Max% Min Max " ,
"0 0 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 0 2999990 " ,
"0 1 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 1 2999992 " ,
"1 0 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 3 2999994 " ,
"1 1 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 5 2999996 " ,
"2 0 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 7 2999998 " ,
"2 1 2.04KB 1.49MB false 0 0.0B 89.62KB 0.0 100.0 9 2999999 ");
Assert.assertTrue(output.contains(expectedOutput));
Assert.assertTrue(output.contains("## version Details"));
Assert.assertTrue(output.contains("written_by Version"));
Assert.assertTrue(output.contains("TestUtil "+ CarbonVersionConstants.CARBONDATA_VERSION));
}
@Test
public void testSummaryPageMeta() {
String[] args = { "-cmd", "summary", "-p", path, "-c", "name", "-k"};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
String expectedOutput = buildLines(
"Blocklet 0:",
"Page 0 (offset 0, length 9): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:96000, total_compressed_size:9, compressor_name:snappy), rowMajor:false, data_page_length:5, rle_page_length:4, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_NATIVE, encoders:[RLE], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000)");
Assert.assertTrue(output.contains(expectedOutput));
}
@Test
public void testSummaryAllColumns() {
String[] args = { "-cmd", "summary", "-p", path, "-C" };
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
Assert.assertTrue(output.contains("Block Blocklet Column Name Meta Size Data Size"));
}
@Test
public void testSummaryAllColumnsForOneFile() {
CarbonFile folder = FileFactory.getCarbonFile(path);
CarbonFile[] carbonFiles = folder.listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile file) {
return file.getName().endsWith(CarbonTablePath.CARBON_DATA_EXT);
}
});
String[] args = { "-cmd", "summary", "-p", carbonFiles[0].getCanonicalPath(), "-C" };
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
Assert.assertTrue(output.contains("Block Blocklet Column Name Meta Size Data Size"));
}
@Test
public void testBenchmark() {
String[] args = {"-cmd", "benchmark", "-p", path, "-a", "-c", "name"};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
}
@Test
public void testBinary() throws IOException, InvalidLoadOptionException {
FileUtils.deleteDirectory(new File(pathBinary));
Field[] fields = new Field[3];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("binaryField", DataTypes.BINARY);
buildBinaryData(5000000, new Schema(fields), pathBinary, new String[]{"name"}, 3, 8);
String[] args = {"-cmd", "summary", "-p", pathBinary};
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String[] args2 = {"-cmd", "summary", "-p", pathBinary, "-s"};
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args2, stream);
String output = new String(out.toByteArray());
Assert.assertTrue(output.contains("binaryfield") && output.contains("BINARY"));
FileUtils.deleteDirectory(new File(pathBinary));
}
@After
public void after() throws IOException {
FileUtils.deleteDirectory(new File(path));
FileUtils.deleteDirectory(new File(pathBinary));
}
}