| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.tools; |
| |
| import org.apache.commons.cli.Option; |
| import org.apache.hadoop.conf.Configuration; |
| |
| /** |
| * Enumeration mapping configuration keys to distcp command line |
| * options. |
| */ |
| public enum DistCpOptionSwitch { |
| |
| /** |
| * Ignores any failures during copy, and continues with rest. |
| * Logs failures in a file |
| */ |
| IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES, |
| new Option("i", false, "Ignore failures during copy")), |
| |
| /** |
| * Preserves status of file/path in the target. |
| * Default behavior with -p, is to preserve replication, |
| * block size, user, group and permission on the target file |
| * |
| * If any of the optional switches are present among rbugp, then |
| * only the corresponding file attribute is preserved |
| * |
| */ |
| PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS, |
| new Option("p", true, "preserve status (rbugp)" + |
| "(replication, block-size, user, group, permission)")), |
| |
| /** |
| * Update target location by copying only files that are missing |
| * in the target. This can be used to periodically sync two folders |
| * across source and target. Typically used with DELETE_MISSING |
| * Incompatible with ATOMIC_COMMIT |
| */ |
| SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, |
| new Option("update", false, "Update target, copying only missing" + |
| "files or directories")), |
| |
| /** |
| * Deletes missing files in target that are missing from source |
| * This allows the target to be in sync with the source contents |
| * Typically used in conjunction with SYNC_FOLDERS |
| * Incompatible with ATOMIC_COMMIT |
| */ |
| DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING, |
| new Option("delete", false, "Delete from target, " + |
| "files missing in source")), |
| |
| /** |
| * Configuration file to use with hftps:// for securely copying |
| * files across clusters. Typically the configuration file contains |
| * truststore/keystore information such as location, password and type |
| */ |
| SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF, |
| new Option("mapredSslConf", true, "Configuration for ssl config file" + |
| ", to use with hftps://")), |
| |
| /** |
| * Max number of maps to use during copy. DistCp will split work |
| * as equally as possible among these maps |
| */ |
| MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, |
| new Option("m", true, "Max number of concurrent maps to use for copy")), |
| |
| /** |
| * Source file listing can be provided to DistCp in a file. |
| * This allows DistCp to copy random list of files from source |
| * and copy them to target |
| */ |
| SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING, |
| new Option("f", true, "List of files that need to be copied")), |
| |
| /** |
| * Copy all the source files and commit them atomically to the target |
| * This is typically useful in cases where there is a process |
| * polling for availability of a file/dir. This option is incompatible |
| * with SYNC_FOLDERS & DELETE_MISSING |
| */ |
| ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY, |
| new Option("atomic", false, "Commit all changes or none")), |
| |
| /** |
| * Work path to be used only in conjunction in Atomic commit |
| */ |
| WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH, |
| new Option("tmp", true, "Intermediate work path to be used for atomic commit")), |
| |
| /** |
| * Log path where distcp output logs are written to |
| */ |
| LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH, |
| new Option("log", true, "Folder on DFS where distcp execution logs are saved")), |
| |
| /** |
| * Copy strategy is use. This could be dynamic or uniform size etc. |
| * DistCp would use an appropriate input format based on this. |
| */ |
| COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY, |
| new Option("strategy", true, "Copy strategy to use. Default is " + |
| "dividing work based on file sizes")), |
| |
| /** |
| * Skip CRC checks between source and target, when determining what |
| * files need to be copied. |
| */ |
| SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC, |
| new Option("skipcrccheck", false, "Whether to skip CRC checks between " + |
| "source and target paths.")), |
| |
| /** |
| * Overwrite target-files unconditionally. |
| */ |
| OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE, |
| new Option("overwrite", false, "Choose to overwrite target files " + |
| "unconditionally, even if they exist.")), |
| |
| /** |
| * Should DisctpExecution be blocking |
| */ |
| BLOCKING("", |
| new Option("async", false, "Should distcp execution be blocking")), |
| |
| FILE_LIMIT("", |
| new Option("filelimit", true, "(Deprecated!) Limit number of files " + |
| "copied to <= n")), |
| |
| SIZE_LIMIT("", |
| new Option("sizelimit", true, "(Deprecated!) Limit number of files " + |
| "copied to <= n bytes")), |
| |
| /** |
| * Specify bandwidth per map in MB |
| */ |
| BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB, |
| new Option("bandwidth", true, "Specify bandwidth per map in MB")); |
| |
| private final String confLabel; |
| private final Option option; |
| |
| DistCpOptionSwitch(String confLabel, Option option) { |
| this.confLabel = confLabel; |
| this.option = option; |
| } |
| |
| /** |
| * Get Configuration label for the option |
| * @return configuration label name |
| */ |
| public String getConfigLabel() { |
| return confLabel; |
| } |
| |
| /** |
| * Get CLI Option corresponding to the distcp option |
| * @return option |
| */ |
| public Option getOption() { |
| return option; |
| } |
| |
| /** |
| * Get Switch symbol |
| * @return switch symbol char |
| */ |
| public String getSwitch() { |
| return option.getOpt(); |
| } |
| |
| @Override |
| public String toString() { |
| return super.name() + " {" + |
| "confLabel='" + confLabel + '\'' + |
| ", option=" + option + '}'; |
| } |
| |
| /** |
| * Helper function to add an option to hadoop configuration object |
| * @param conf - Configuration object to include the option |
| * @param option - Option to add |
| * @param value - Value |
| */ |
| public static void addToConf(Configuration conf, |
| DistCpOptionSwitch option, |
| String value) { |
| conf.set(option.getConfigLabel(), value); |
| } |
| |
| /** |
| * Helper function to set an option to hadoop configuration object |
| * @param conf - Configuration object to include the option |
| * @param option - Option to add |
| */ |
| public static void addToConf(Configuration conf, |
| DistCpOptionSwitch option) { |
| conf.set(option.getConfigLabel(), "true"); |
| } |
| } |