blob: 348f851ad3835de982a6878a6360299738e6a87b [file] [log] [blame]
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
#include <google/protobuf/stubs/common.h>
#include <unistd.h>
#include <future>
#include "tools_common.h"
void usage(){
std::cout << "Usage: hdfs_find [OPTION] PATH"
<< std::endl
<< std::endl << "Finds all files recursively starting from the"
<< std::endl << "specified PATH and prints their file paths."
<< std::endl << "This hdfs_find tool mimics the POSIX find."
<< std::endl
<< std::endl << "Both PATH and NAME can have wild-cards."
<< std::endl
<< std::endl << " -n NAME if provided all results will be matching the NAME pattern"
<< std::endl << " otherwise, the implicit '*' will be used"
<< std::endl << " NAME allows wild-cards"
<< std::endl
<< std::endl << " -m MAX_DEPTH if provided the maximum depth to recurse after the end of"
<< std::endl << " the path is reached will be limited by MAX_DEPTH"
<< std::endl << " otherwise, the maximum depth to recurse is unbound"
<< std::endl << " MAX_DEPTH can be set to 0 for pure globbing and ignoring"
<< std::endl << " the NAME option (no recursion after the end of the path)"
<< std::endl
<< std::endl << " -h display this help and exit"
<< std::endl
<< std::endl << "Examples:"
<< std::endl << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n some?file*name"
<< std::endl << "hdfs_find / -n file_name -m 3"
<< std::endl;
}
int main(int argc, char *argv[]) {
//We should have at least 2 arguments
if (argc < 2) {
usage();
exit(EXIT_FAILURE);
}
int input;
//If NAME is not specified we use implicit "*"
std::string name = "*";
//If MAX_DEPTH is not specified we use the max value of uint_32_t
uint32_t max_depth = hdfs::FileSystem::GetDefaultFindMaxDepth();
//Using GetOpt to read in the values
opterr = 0;
while ((input = getopt(argc, argv, "hn:m:")) != -1) {
switch (input)
{
case 'h':
usage();
exit(EXIT_SUCCESS);
case 'n':
name = optarg;
break;
case 'm':
max_depth = std::stoi(optarg);
break;
case '?':
if (optopt == 'n' || optopt == 'm')
std::cerr << "Option -" << (char) optopt << " requires an argument." << std::endl;
else if (isprint(optopt))
std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
else
std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl;
usage();
exit(EXIT_FAILURE);
default:
exit(EXIT_FAILURE);
}
}
std::string uri_path = argv[optind];
//Building a URI object from the given uri_path
hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
if (!fs) {
std::cerr << "Could not connect the file system. " << std::endl;
exit(EXIT_FAILURE);
}
std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
std::future<void> future(promise->get_future());
hdfs::Status status = hdfs::Status::OK();
/**
* Keep requesting more until we get the entire listing. Set the promise
* when we have the entire listing to stop.
*
* Find guarantees that the handler will only be called once at a time,
* so we do not need any locking here. It also guarantees that the handler will be
* only called once with has_more_results set to false.
*/
auto handler = [promise, &status]
(const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
//Print result chunks as they arrive
if(!si.empty()) {
for (hdfs::StatInfo const& s : si) {
std::cout << s.str() << std::endl;
}
}
if(!s.ok() && status.ok()){
//We make sure we set 'status' only on the first error.
status = s;
}
if (!has_more_results) {
promise->set_value(); //set promise
return false; //request stop sending results
}
return true; //request more results
};
//Asynchronous call to Find
fs->Find(uri.get_path(), name, max_depth, handler);
//block until promise is set
future.get();
if(!status.ok()) {
std::cerr << "Error: " << status.ToString() << std::endl;
}
// Clean up static data and prevent valgrind memory leaks
google::protobuf::ShutdownProtobufLibrary();
return 0;
}