blob: f95e88efcc57dd2872839099253ed3976fff9bb4 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "orc/ColumnPrinter.hh"
#include "orc/Exceptions.hh"
#include <getopt.h>
#include <string>
#include <memory>
#include <iostream>
#include <string>
void printStatistics(const char *filename, bool withIndex) {
orc::ReaderOptions opts;
std::unique_ptr<orc::Reader> reader;
reader = orc::createReader(orc::readFile(std::string(filename)), opts);
// print out all selected columns statistics.
std::unique_ptr<orc::Statistics> colStats = reader->getStatistics();
std::cout << "File " << filename << " has "
<< colStats->getNumberOfColumns() << " columns" << std::endl;
for(uint32_t i=0; i < colStats->getNumberOfColumns(); ++i) {
std::cout << "*** Column " << i << " ***" << std::endl;
std::cout << colStats->getColumnStatistics(i)->toString() << std::endl;
}
// test stripe statistics
std::unique_ptr<orc::StripeStatistics> stripeStats;
std::cout << "File " << filename << " has " << reader->getNumberOfStripes()
<< " stripes" << std::endl;
if (reader->getNumberOfStripeStatistics() == 0) {
std::cout << "File " << filename << " doesn't have stripe statistics"
<< std::endl;
} else {
for (unsigned int j = 0; j < reader->getNumberOfStripeStatistics(); j++) {
stripeStats = reader->getStripeStatistics(j);
std::cout << "*** Stripe " << j << " ***" << std::endl << std::endl;
for(unsigned int k = 0; k < stripeStats->getNumberOfColumns(); ++k) {
std::cout << "--- Column " << k << " ---" << std::endl;
std::cout << stripeStats->getColumnStatistics(k)->toString()
<< std::endl;
if (withIndex) {
for(unsigned int r = 0; r < stripeStats->getNumberOfRowIndexStats(k); ++r) {
std::cout << "--- RowIndex " << r << " ---" << std::endl;
std::cout << stripeStats->getRowIndexStatistics(k, r)->toString()
<< std::endl;
}
}
}
}
}
}
int main(int argc, char* argv[]) {
static struct option longOptions[] = {
{"help", no_argument, ORC_NULLPTR, 'h'},
{"withIndex", no_argument, ORC_NULLPTR, 'i'},
{ORC_NULLPTR, 0, ORC_NULLPTR, 0}
};
const char* filename = ORC_NULLPTR;
bool withIndex = false;
bool helpFlag = false;
int opt;
do {
opt = getopt_long(argc, argv, "hi", longOptions, ORC_NULLPTR);
switch (opt) {
case '?':
case 'h':
helpFlag = true;
opt = -1;
break;
case 'i':
withIndex = true;
break;
}
} while (opt != -1);
argc -= optind;
argv += optind;
if (argc < 1 || helpFlag) {
std::cerr
<< "Usage: orc-statistics [-h] [--help] [-i] [--withIndex]"
<< " <filenames>\n";
exit(1);
}
for(int i=0; i < argc; ++i) {
filename = argv[i];
try {
printStatistics(filename, withIndex);
} catch (std::exception& ex) {
std::cerr << "Caught exception: " << ex.what() << "\n";
return 1;
}
}
return 0;
}