blob: 30b43be0bfe85dd7db265ddbd3ea6c3e68fdfcf2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_RUNTIME_HDFS_FS_CACHE_H
#define IMPALA_RUNTIME_HDFS_FS_CACHE_H
#include <string>
#include <boost/scoped_ptr.hpp>
#include <boost/unordered_map.hpp>
#include <boost/thread/mutex.hpp>
#include "common/hdfs.h"
#include "common/status.h"
namespace impala {
/// A (process-wide) cache of hdfsFS objects.
/// These connections are shared across all threads and kept open until the process
/// terminates.
//
/// These connections are leaked, i.e. we never call hdfsDisconnect(). Calls to
/// hdfsDisconnect() by individual threads would terminate all other connections handed
/// out via hdfsConnect() to the same URI, and there is no simple, safe way to call
/// hdfsDisconnect() when process terminates (the proper solution is likely to create a
/// signal handler to detect when the process is killed, but we would still leak when
/// impalad crashes).
class HdfsFsCache {
public:
typedef boost::unordered_map<std::string, hdfsFS> HdfsFsMap;
static HdfsFsCache* instance() { return HdfsFsCache::instance_.get(); }
/// Initializes the cache. Must be called before any other APIs.
static Status Init();
/// Get connection to the local filesystem.
Status GetLocalConnection(hdfsFS* fs);
/// Get connection to specific fs by specifying a path. Optionally, a local cache can
/// be provided so that the process-wide lock can be avoided on subsequent calls for
/// the same filesystem. The caller is responsible for synchronizing the local cache
/// (e.g. by passing a thread-local cache).
Status GetConnection(const std::string& path, hdfsFS* fs,
HdfsFsMap* local_cache = NULL);
/// Get NameNode info from path, set error message if path is not valid.
/// Exposed as a static method for testing purpose.
static string GetNameNodeFromPath(const string& path, string* err);
/// S3A access key retrieved by running command in Init().
/// If either s3a_secret_key_ or this are empty, the default value is taken from the
/// local Hadoop client configuration.
static std::string s3a_access_key_;
/// S3A secret key retrieved by running command in Init().
/// If either s3a_access_key_ or this are empty, the default value is taken from the
/// local Hadoop client configuration.
static std::string s3a_secret_key_;
private:
/// Singleton instance. Instantiated in Init().
static boost::scoped_ptr<HdfsFsCache> instance_;
boost::mutex lock_; // protects fs_map_
HdfsFsMap fs_map_;
HdfsFsCache() { }
HdfsFsCache(HdfsFsCache const& l); // disable copy ctor
HdfsFsCache& operator=(HdfsFsCache const& l); // disable assignment
};
}
#endif