blob: 398426a827a3b6957cc4ffebacecb462468b59ae [file] [log] [blame]
#!/usr/bin/env /usr/lib/bigtop-groovy/bin/groovy
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import groovy.json.JsonSlurper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.permission.FsPermission;
def final LOG = LogFactory.getLog(this.getClass());
def final jsonParser = new JsonSlurper();
def final USAGE = """\
This script provisions the skeleton of a hadoop file system.
It takes a single argument: The json schema (a list of lists),
of 4 element tuples. For an example , see the bigtop init-hcfs.json
file. The main elements of the JSON file are:
A copy of init-hcfs.json ships with bigtop distributions.
dir: list of dirs to create with permissions.
user: list of users to setup home dirs with permissions.
root_user: The root owner of distributed FS, to run shell commands.
To run this script, you will want to setup your environment using
which defines the properties above, and then invoke this script.
Details below.
As mentinoed above, the init-hcfs.json file is what guides the
directories/users to setup.
So first you will want to edit that file as you need to. Some common
- Usually the "root_user" on HDFS is just hdfs. For other file systems
the root user might be "root".
- The default hadoop users you may find in the init-hcfs.json template
you follow "tom"/"alice"/etc.. aren't necessarily on all clusters.
1) Simple groovy based method: Just manually construct a hadoop classpath:
groovy -classpath /usr/lib/hadoop/hadoop-common-2.0.6-alpha.jar
:/usr/lib/hadoop/lib/protobuf-java-2.4.0a.jar /vagrant/init-hcfs.groovy
2) Another method: Follow the instructions on
for setting up groovy runtime environment with
CLASSPATH and/or append those libraries to the shebang command as
necessary, and then simply do:
chmod +x init-hcfs.groovy
./init-hcfs.groovy init-hcfs.json
* The HCFS generic provisioning process:
* 1) Create a file system skeleton.
* 2) Create users with home dirs in /user.
* In the future maybe we will add more optional steps (i.e. adding libs to
* the distribtued cache, mounting FUSE over HDFS, etc...).
def errors = [
("0: No init-hcfs.json input file provided !"): {"Checking argument length: " + args.length + " " + args);
return args.length == 1
("1: init-hcfs json not found."): {"Checking for file : " + args[0]);
return new File(args[0]).exists()
errors.each { error_message, passed ->
if (! {
System.err.println("ERROR:" + error_message);
def final json = args[0];
def final parsedData = jsonParser.parse(new FileReader(json));
* Groovy is smart enough to convert JSON
* fields to objects for us automagically.
* */
def dirs = parsedData.dir as List;
def users = parsedData.user as List;
def hcfs_super_user = parsedData.root_user;
def final Configuration conf = new Configuration();"Provisioning file system for file system from Configuration: " +
* We create a single FileSystem instance to use for all the file system calls.
* This script makes anywhere from 20-100 file system operations so it's
* important to cache and create this only once.
* */
def final FileSystem fs = FileSystem.get(conf);"PROVISIONING WITH FILE SYSTEM : " + fs.getClass());
// Longest back off time to check whether the file system is ready for write
def final int maxBackOff = 64;
* Make a directory. Note when providing input to this functino that if
* nulls are given, the commands will work but behaviour varies depending on
* the HCFS implementation ACLs, etc.
* @param fs The HCFS implementation to create the Directory on.
* @param dname Required.
* @param mode can be null.
* @param user can be null.
* @param group can be null,
def mkdir = { FileSystem fsys, Path dname, FsPermission mode, String user, String group ->
boolean success = false;
for(i = 1; i <= maxBackOff; i*=2) {
try {
success = fsys.mkdirs(dname)
} catch(Exception e) {"Failed to create directory " + dname + "... Retry after " + i + " second(s)");
if (!success) {"Can not create directory " + dname + " on " + fsys.getClass());
if (user != null) {
fsys.setOwner(dname, user, group);
if (mode != null) {
fsys.setPermission(dname, mode);
FsPermission result = fsys.getFileStatus(dname).getPermission();
/** Confirm that permission took properly.
* important to do this since while we work on better
* docs for modifying and maintaining this new approach
* to HCFS provisioning.*/
if (!fsys.getFileStatus(dname).getPermission().equals(mode)) {
throw new RuntimeException("Failed at setting permission to " + mode +
"... target directory permission is incorrect: " + result);
* Create a perm from raw string representing an octal perm.
* @param mode The stringified octal mode (i.e. "1777")
* */
private FsPermission readPerm(String mode) {
Short permValue = Short.decode("0" + mode);
//This constructor will decode the octal perm bits
//out of the short.
return new FsPermission(permValue);
int dirs_created = 0;
* Provisioning the directories on the file system. This is the
* most important task of this script, as a basic directory skeleton
* is needed even for basic yarn/mapreduce apps before startup.
* */
dirs.each() {
def (dname, mode, user, group) = it;
dname = new Path(dname);
//We encode permissions as strings, since they are octal.
//JSON doesn't support octal natively.
if (mode != null)
mode = readPerm(mode) as FsPermission;
if (user?.equals("HCFS_SUPER_USER"))
user = hcfs_super_user;"mkdirs " + dname + " " + user + " " + mode + " " + group);
mkdir(fs, dname, mode, user, group);
}"Succesfully created " + dirs_created + " directories in the DFS.");
* Now, for most clusters we will generally start out with at least one
* user. You should modify your init-hcfs.json file accordingly if you
* have a set of users you want to setup for using hadoop.
* For each user we do initial setup, create a home directory, etc...
* You may also need to do special tasks if running LinuxTaskControllers,
* etc, which aren't (yet) handled by this provisioner.
* */
users.each() {
def (user, permission, group) = it;"current user: " + user);
Path homedir = new Path("/user/" + user);
//perms should be ALL, RX,RX ^^
fs.setOwner(homedir, user, group);
FsPermission perm = readPerm(permission);
fs.setPermission(homedir, perm);
* Copys jar files from a destination into the distributed FS.
* Directories and broken symlinks will be skipped.
* @param fs An instance of an HCFS FileSystem .
* @param input The LOCAL DIRECTORY containing jar files.
* @param jarstr A jar file name filter used to reject/accept jar names.
* See the script below for example of how it's used. Jars matching this
* string will be copied into the specified path on the "target" directory.
* @param target The path on the DISTRIBUTED FS where jars should be copied
* to.
* @return The total number of jars copied into the DFS.
def copyJars = { FileSystem fsys, File input, String jarstr, Path target ->
int copied = 0;
input.listFiles(new FileFilter() {
public boolean accept(File f) {
String filename = f.getName();
boolean validJar = filename.endsWith("jar") && f.isFile();
return validJar && filename.contains(jarstr)
}).each({ jar_file ->
boolean success = false;
for(i = 1; i <= maxBackOff; i*=2) {
try {
fsys.copyFromLocalFile(new Path(jar_file.getAbsolutePath()), target)
success = true;
} catch(Exception e) {"Failed to upload " + jar_file.getAbsolutePath() + " to " + target + "... Retry after " + i + " second(s)");
if (!success) {"Can not upload " + jar_file.getAbsolutePath() + " to " + target + " on " + fsys.getClass());
return copied;
total_jars = 0;"Now copying Jars into the DFS for tez ");"This might take a few seconds...");
def final TEZ_APPS = "/apps";
def final TEZ_HOME = "/usr/lib/tez/";
total_jars += copyJars(fs,
new File(TEZ_HOME, "lib/"), "",
new Path(TEZ_APPS, "tez/lib"))
total_jars += copyJars(fs,
new File(TEZ_HOME), "",
new Path(TEZ_APPS, "tez"))"Total jars copied into the DFS : " + total_jars);