| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <getopt.h> |
| #include <iostream> |
| #include <fstream> |
| #include <vector> |
| #include <string> |
| #include <sstream> |
| |
| #include "orc/OrcFile.hh" |
| #include "orc/Exceptions.hh" |
| |
| //#include "Adaptor.hh" |
| #include "wrap/orc-proto-wrapper.hh" |
| |
| void printStripeInformation(std::ostream& out, |
| uint64_t index, |
| uint64_t columns, |
| std::unique_ptr<orc::StripeInformation> stripe, |
| bool verbose) { |
| out << " { \"stripe\": " << index |
| << ", \"rows\": " << stripe->getNumberOfRows() << ",\n"; |
| out << " \"offset\": " << stripe->getOffset() |
| << ", \"length\": " << stripe->getLength() << ",\n"; |
| out << " \"index\": " << stripe->getIndexLength() |
| << ", \"data\": " << stripe->getDataLength() |
| << ", \"footer\": " << stripe->getFooterLength(); |
| if (verbose) { |
| out << ",\n \"encodings\": [\n"; |
| for(uint64_t col=0; col < columns; ++col) { |
| if (col != 0) { |
| out << ",\n"; |
| } |
| orc::ColumnEncodingKind encoding = stripe->getColumnEncoding(col); |
| out << " { \"column\": " << col |
| << ", \"encoding\": \"" |
| << columnEncodingKindToString(encoding) << "\""; |
| if (encoding == orc::ColumnEncodingKind_DICTIONARY || |
| encoding == orc::ColumnEncodingKind_DICTIONARY_V2) { |
| out << ", \"count\": " << stripe->getDictionarySize(col); |
| } |
| out << " }"; |
| } |
| out << "\n ],\n"; |
| out << " \"streams\": [\n"; |
| for(uint64_t str = 0; str < stripe->getNumberOfStreams(); ++str) { |
| if (str != 0) { |
| out << ",\n"; |
| } |
| ORC_UNIQUE_PTR<orc::StreamInformation> stream = |
| stripe->getStreamInformation(str); |
| out << " { \"id\": " << str |
| << ", \"column\": " << stream->getColumnId() |
| << ", \"kind\": \"" << streamKindToString(stream->getKind()) |
| << "\", \"offset\": " << stream->getOffset() |
| << ", \"length\": " << stream->getLength() << " }"; |
| } |
| out << "\n ]"; |
| std::string tz = stripe->getWriterTimezone(); |
| if (tz.length() != 0) { |
| out << ",\n \"timezone\": \"" << tz << "\""; |
| } |
| } |
| out << "\n }"; |
| } |
| |
| void printRawTail(std::ostream& out, |
| const char*filename) { |
| out << "Raw file tail: " << filename << "\n"; |
| std::unique_ptr<orc::Reader> reader = |
| orc::createReader(orc::readFile(filename), orc::ReaderOptions()); |
| // Parse the file tail from the serialized one. |
| orc::proto::FileTail tail; |
| if (!tail.ParseFromString(reader->getSerializedFileTail())) { |
| throw orc::ParseError("Failed to parse the file tail from string"); |
| } |
| out << tail.DebugString(); |
| } |
| |
| void printMetadata(std::ostream & out, const char*filename, bool verbose) { |
| std::unique_ptr<orc::Reader> reader = |
| orc::createReader(orc::readFile(filename), orc::ReaderOptions()); |
| out << "{ \"name\": \"" << filename << "\",\n"; |
| uint64_t numberColumns = reader->getType().getMaximumColumnId() + 1; |
| out << " \"type\": \"" |
| << reader->getType().toString() << "\",\n"; |
| out << " \"rows\": " << reader->getNumberOfRows() << ",\n"; |
| uint64_t stripeCount = reader->getNumberOfStripes(); |
| out << " \"stripe count\": " << stripeCount << ",\n"; |
| out << " \"format\": \"" << reader->getFormatVersion().toString() |
| << "\", \"writer version\": \"" |
| << orc::writerVersionToString(reader->getWriterVersion()) |
| << "\",\n"; |
| out << " \"compression\": \"" |
| << orc::compressionKindToString(reader->getCompression()) |
| << "\","; |
| if (reader->getCompression() != orc::CompressionKind_NONE) { |
| out << " \"compression block\": " |
| << reader->getCompressionSize() << ","; |
| } |
| out << "\n \"file length\": " << reader->getFileLength() << ",\n"; |
| out << " \"content\": " << reader->getContentLength() |
| << ", \"stripe stats\": " << reader->getStripeStatisticsLength() |
| << ", \"footer\": " << reader->getFileFooterLength() |
| << ", \"postscript\": " << reader->getFilePostscriptLength() << ",\n"; |
| if (reader->getRowIndexStride()) { |
| out << " \"row index stride\": " |
| << reader->getRowIndexStride() << ",\n"; |
| } |
| out << " \"user metadata\": {"; |
| std::list<std::string> keys = reader->getMetadataKeys(); |
| uint64_t remaining = keys.size(); |
| for(std::list<std::string>::const_iterator itr = keys.begin(); |
| itr != keys.end(); ++itr) { |
| out << "\n \"" << *itr << "\": \"" |
| << reader->getMetadataValue(*itr) << "\""; |
| if (--remaining != 0) { |
| out << ","; |
| } |
| } |
| out << "\n },\n"; |
| out << " \"stripes\": [\n"; |
| for(uint64_t i=0; i < stripeCount; ++i) { |
| printStripeInformation(out, i, numberColumns, reader->getStripe(i), |
| verbose); |
| if (i == stripeCount - 1) { |
| out << "\n"; |
| } else { |
| out << ",\n"; |
| } |
| } |
| out << " ]\n"; |
| out << "}\n"; |
| } |
| |
| int main(int argc, char* argv[]) { |
| static struct option longOptions[] = { |
| {"help", no_argument, ORC_NULLPTR, 'h'}, |
| {"raw", no_argument, ORC_NULLPTR, 'r'}, |
| {"verbose", no_argument, ORC_NULLPTR, 'v'}, |
| {ORC_NULLPTR, 0, ORC_NULLPTR, 0} |
| }; |
| bool helpFlag = false; |
| bool verboseFlag = false; |
| bool rawFlag = false; |
| int opt; |
| do { |
| opt = getopt_long(argc, argv, "hrv", longOptions, ORC_NULLPTR); |
| switch (opt) { |
| case '?': |
| case 'h': |
| helpFlag = true; |
| opt = -1; |
| break; |
| case 'v': |
| verboseFlag = true; |
| break; |
| case 'r': |
| rawFlag = true; |
| break; |
| } |
| } while (opt != -1); |
| argc -= optind; |
| argv += optind; |
| |
| if (argc < 1 || helpFlag) { |
| std::cerr |
| << "Usage: orc-metadata [-h] [--help] [-r] [--raw] [-v] [--verbose]" |
| << " <filename>\n"; |
| exit(1); |
| } else { |
| for(int i=0; i < argc; ++i) { |
| try { |
| if (rawFlag) { |
| printRawTail(std::cout, argv[i]); |
| } else { |
| printMetadata(std::cout, argv[i], verboseFlag); |
| } |
| } catch (std::exception& ex) { |
| std::cerr << "Caught exception in " << argv[i] |
| << ": " << ex.what() << "\n"; |
| return 1; |
| } |
| } |
| } |
| return 0; |
| } |