blob: f30a7b17445a78eef773afccb56ecd347797d8c6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXPRS_TIMEZONE_DB_H
#define IMPALA_EXPRS_TIMEZONE_DB_H
#include <unordered_map>
#include "cctz/time_zone.h"
#include "common/global-types.h"
#include "common/status.h"
#include "util/zip-util.h"
namespace impala {
/// 'TimezoneDatabase' class contains functions to load and access the IANA time-zone
/// database. The IANA time-zone database (often called tz db) contains binary data files
/// that represent the history of local time for many representative locations around the
/// globe. Further information on tz db, including implementation details, is described in
/// the code repository: https://data.iana.org/time-zones/tz-link.html
///
/// The system's default tz db is located in /usr/share/zoneinfo directory. The zoneinfo
/// directory's tree structure follows the IANA time-zone naming scheme, e.g. the
/// /usr/share/zoneinfo/America/Los_Angeles file describes the "America/Los_Angeles"
/// time-zone.
///
/// This class uses the CCTZ library's 'time_zone' class internally to load, store and
/// utilize time-zone data. Further information on CCTZ is described in the code
/// repository: https://github.com/google/cctz
///
/// Initialize() should be called on process startup to load every time-zone file into
/// memory for fast lookups. It loads files from /usr/share/zoneinfo by default.
/// Alternatively, FLAGS_hdfs_zone_info_zip can be used to specify a shared zip file that
/// contains the compiled time-zone db to use.
///
/// Initialize() also defines a hard-coded set of non-standard time-zone aliases to
/// maintain a level of backward compatibility with the previous boost-based
/// implementation. Alternatively, FLAGS_hdfs_zone_alias_conf can be used to specify a
/// shared configuration file to load the non-standard aliases from.
///
/// Once Initialize() returned without error, FindTimezone() can be safely called from
/// multiple threads to look up time-zones by name.
class TimezoneDatabase {
public:
/// Set up the static time-zone database.
static Status Initialize() WARN_UNUSED_RESULT;
/// Return path to time-zone database.
static const string& GetPath() { return tz_db_path_; }
/// Returns name of the local time-zone or empty string if cannot find it.
static std::string LocalZoneName();
/// Looks up 'Timezone' object by name. Returns pointer to the 'Timezone' object if the
/// lookup was successful and nullptr otherwise.
static const Timezone* FindTimezone(const std::string& tz_name) {
auto it = tz_name_map_.find(tz_name);
return (it == tz_name_map_.end()) ? nullptr : it->second.get();
}
static const Timezone& GetUtcTimezone() { return UTC_TIMEZONE_; }
/// Public proxy for LoadZoneInfo. Should be only used in BE tests.
static Status LoadZoneInfoBeTestOnly(
const std::string& zone_info_dir) WARN_UNUSED_RESULT {
return LoadZoneInfo(zone_info_dir);
}
private:
// For BE tests
friend class TimezoneDbNamesTest;
friend class TimezoneDbLoadAliasTest;
friend class TimezoneDbLoadZoneInfoTest;
static const std::string ZONE_INFO_DIR;
static const std::string TIMEZONE_ALIASES;
static const Timezone UTC_TIMEZONE_;
/// Type to map time-zone names to Timezone objects.
typedef std::unordered_map<std::string, std::shared_ptr<Timezone>> TimezoneMap;
static TimezoneMap tz_name_map_;
static std::string tz_db_path_;
/// Returns 'true' if 'tz_segment' is a valid time-zone name segment. Time-zone name
/// segments can have letters, digits and '_', '-', '+' characters only. Name segments
/// must begin with an uppercase letter.
/// Some examples of valid time-zone name segments are: Los_Angeles, GMT+1,
/// East-Indiana.
static bool IsTimezoneNameSegmentValid(
const std::string& tz_segment) WARN_UNUSED_RESULT;
/// Returns 'true' if 'tz_name' is a valid time-zone name. Time-zone names must be valid
/// time-zone name segments delimited by '/', e.g.: America/Argentina/San_Juan.
static bool IsTimezoneNameValid(const std::string& tz_name) WARN_UNUSED_RESULT;
/// Parses the UTC offset in 'tz_offset' and returns 'true' if it is valid. If a valid
/// offset was found, 'offset_sec' is set to the parsed value.
static bool IsTimezoneOffsetValid(const std::string& tz_offset,
int64_t* offset_sec) WARN_UNUSED_RESULT;
/// Load 'Timezone' objects into 'tz_name_map_' from the shared 'hdfs_zone_info_zip' zip
/// archive.
static Status LoadZoneInfoFromHdfs(const std::string& hdfs_zone_info_zip,
const std::string& local_dir) WARN_UNUSED_RESULT;
/// Load 'Timezone' objects into 'tz_name_map_' from 'zone_info_dir' path.
static Status LoadZoneInfo(const std::string& zone_info_dir) WARN_UNUSED_RESULT;
/// Recursive function to load 'Timezone' objects into 'tz_path_map' from 'path'.
/// 'zone_info_dir' is the root directory of the time-zone db.
static Status LoadZoneInfoHelper(const std::string& path,
const std::string& zone_info_dir, TimezoneMap& tz_path_map) WARN_UNUSED_RESULT;
/// Load 'Timezone' object from file 'path'. 'zone_info_dir' is the root directory of
/// the time-zone db.
/// - If 'path' is not a symbolic link, load 'Timezone' object from 'path' and add it to
/// 'tz_path_map' as a value mapped to 'path'.
/// - If 'path' is a symbolic link to another time-zone file, load 'Timezone' object
/// from the linked path and add it to 'tz_path_map' as a value mapped both to 'path'
/// and the linked path.
static void LoadTimezone(const std::string& path, const std::string& zone_info_dir,
TimezoneMap& tz_path_map);
/// Load 'Timezone' object from file 'path'. If successful, return 'shared_ptr' to the
/// 'Timezone' object and nullptr otherwise.
static std::shared_ptr<Timezone> LoadTimezoneHelper(
const std::string& path) WARN_UNUSED_RESULT;
/// Load custom time-zone aliases from 'hdfs_zone_alias_conf' shared file and add them
/// to 'tz_name_map_'.
static Status LoadZoneAliasesFromHdfs(
const string& hdfs_zone_alias_conf) WARN_UNUSED_RESULT;
/// Load custom time-zone aliases from 'is' and add them to 'tz_name_map_'.
static Status LoadZoneAliases(
std::istream &is, const char* path = nullptr) WARN_UNUSED_RESULT;
};
} // namespace impala
#endif