blob: 07682edabb76aff7b91d1b2305e81c7554164a59 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.tool;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.carbondata.common.Strings;
import org.apache.carbondata.core.datastore.block.BlockletInfos;
import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.datastore.chunk.AbstractRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory;
import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader;
import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
import org.apache.carbondata.core.util.DataFileFooterConverterV3;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.format.FileFooter3;
import org.apache.carbondata.format.FileHeader;
import org.apache.commons.cli.CommandLine;
class ScanBenchmark implements Command {
private String dataFolder;
private DataFile file;
private List<String> outPuts;
ScanBenchmark(String dataFolder, List<String> outPuts) {
this.dataFolder = dataFolder;
this.outPuts = outPuts;
}
@Override
public void run(CommandLine line) throws IOException, MemoryException {
if (line.hasOption("f")) {
String filePath = line.getOptionValue("f");
file = new DataFile(FileFactory.getCarbonFile(filePath));
} else {
FileCollector collector = new FileCollector(outPuts);
collector.collectFiles(dataFolder);
if (collector.getNumDataFiles() == 0) {
return;
}
Map<String, DataFile> dataFiles = collector.getDataFiles();
Iterator<DataFile> iterator = dataFiles.values().iterator();
// use the first file and close the rest
file = iterator.next();
while (iterator.hasNext()) {
iterator.next().close();
}
}
outPuts.add("\n## Benchmark");
final AtomicReference<FileHeader> fileHeaderRef = new AtomicReference<>();
final AtomicReference<FileFooter3> fileFoorterRef = new AtomicReference<>();
final AtomicReference<DataFileFooter> convertedFooterRef = new AtomicReference<>();
// benchmark read header and footer time
benchmarkOperation("ReadHeaderAndFooter", new Operation() {
@Override public void run() throws IOException, MemoryException {
fileHeaderRef.set(file.readHeader());
fileFoorterRef.set(file.readFooter());
}
});
final FileHeader fileHeader = fileHeaderRef.get();
final FileFooter3 fileFooter = fileFoorterRef.get();
// benchmark convert footer
benchmarkOperation("ConvertFooter", new Operation() {
@Override public void run() throws IOException, MemoryException {
convertFooter(fileHeader, fileFooter);
}
});
// benchmark read all meta and convert footer
benchmarkOperation("ReadAllMetaAndConvertFooter", new Operation() {
@Override public void run() throws IOException, MemoryException {
DataFileFooter footer = readAndConvertFooter(file);
convertedFooterRef.set(footer);
}
});
if (line.hasOption("c")) {
String columnName = line.getOptionValue("c");
outPuts.add("\nScan column '" + columnName + "'");
final DataFileFooter footer = convertedFooterRef.get();
final AtomicReference<AbstractRawColumnChunk> columnChunk = new AtomicReference<>();
final int columnIndex = file.getColumnIndex(columnName);
final boolean dimension = file.getColumn(columnName).isDimensionColumn();
for (int i = 0; i < footer.getBlockletList().size(); i++) {
final int blockletId = i;
outPuts.add(String.format("Blocklet#%d: total size %s, %,d pages, %,d rows",
blockletId,
Strings.formatSize(file.getColumnDataSizeInBytes(blockletId, columnIndex)),
footer.getBlockletList().get(blockletId).getNumberOfPages(),
footer.getBlockletList().get(blockletId).getNumberOfRows()));
benchmarkOperation("\tColumnChunk IO", new Operation() {
@Override public void run() throws IOException, MemoryException {
columnChunk.set(readBlockletColumnChunkIO(footer, blockletId, columnIndex, dimension));
}
});
if (dimensionColumnChunkReader != null) {
benchmarkOperation("\tDecompress Pages", new Operation() {
@Override public void run() throws IOException, MemoryException {
decompressDimensionPages(columnChunk.get(),
footer.getBlockletList().get(blockletId).getNumberOfPages());
}
});
} else {
benchmarkOperation("\tDecompress Pages", new Operation() {
@Override public void run() throws IOException, MemoryException {
decompressMeasurePages(columnChunk.get(),
footer.getBlockletList().get(blockletId).getNumberOfPages());
}
});
}
}
}
file.close();
}
interface Operation {
void run() throws IOException, MemoryException;
}
private void benchmarkOperation(String opName, Operation op) throws IOException, MemoryException {
long start, end;
start = System.nanoTime();
op.run();
end = System.nanoTime();
outPuts.add(String.format("%s takes %,d us", opName, (end - start) / 1000));
}
private DataFileFooter readAndConvertFooter(DataFile file) throws IOException {
int numBlocklets = file.getNumBlocklet();
BlockletInfos blockletInfos = new BlockletInfos(numBlocklets, 0, numBlocklets);
String segmentId = CarbonTablePath.DataFileUtil.getSegmentNo(file.getFilePath());
TableBlockInfo blockInfo =
new TableBlockInfo(file.getFilePath(), file.getFooterOffset(),
segmentId, new String[]{"localhost"}, file.getFileSizeInBytes(),
blockletInfos, ColumnarFormatVersion.V3, new String[0]);
DataFileFooterConverterV3 converter = new DataFileFooterConverterV3();
return converter.readDataFileFooter(blockInfo);
}
private DataFileFooter convertFooter(FileHeader fileHeader, FileFooter3 fileFooter) {
DataFileFooterConverterV3 converter = new DataFileFooterConverterV3();
return converter.convertDataFileFooter(fileHeader, fileFooter);
}
private DimensionColumnChunkReader dimensionColumnChunkReader;
private MeasureColumnChunkReader measureColumnChunkReader;
private AbstractRawColumnChunk readBlockletColumnChunkIO(
DataFileFooter footer, int blockletId, int columnIndex, boolean dimension)
throws IOException {
BlockletInfo blockletInfo = footer.getBlockletList().get(blockletId);
if (dimension) {
dimensionColumnChunkReader = CarbonDataReaderFactory.getInstance()
.getDimensionColumnChunkReader(ColumnarFormatVersion.V3, blockletInfo,
footer.getSegmentInfo().getColumnCardinality(), file.getFilePath(), false);
return dimensionColumnChunkReader.readRawDimensionChunk(file.getFileReader(), columnIndex);
} else {
columnIndex = columnIndex - file.numDimensions();
assert (columnIndex >= 0);
measureColumnChunkReader = CarbonDataReaderFactory.getInstance()
.getMeasureColumnChunkReader(ColumnarFormatVersion.V3, blockletInfo,
file.getFilePath(), false);
return measureColumnChunkReader.readRawMeasureChunk(file.getFileReader(), columnIndex);
}
}
private DimensionColumnPage[] decompressDimensionPages(
AbstractRawColumnChunk rawColumnChunk, int numPages) throws IOException, MemoryException {
DimensionColumnPage[] pages = new DimensionColumnPage[numPages];
for (int i = 0; i < pages.length; i++) {
pages[i] = dimensionColumnChunkReader.decodeColumnPage(
(DimensionRawColumnChunk) rawColumnChunk, i, null);
}
return pages;
}
private ColumnPage[] decompressMeasurePages(
AbstractRawColumnChunk rawColumnChunk, int numPages) throws IOException, MemoryException {
ColumnPage[] pages = new ColumnPage[numPages];
for (int i = 0; i < pages.length; i++) {
pages[i] = measureColumnChunkReader.decodeColumnPage(
(MeasureRawColumnChunk) rawColumnChunk, i, null);
}
return pages;
}
}