| .\" Licensed to the Apache Software Foundation (ASF) under one or more |
| .\" contributor license agreements. See the NOTICE file distributed with |
| .\" this work for additional information regarding copyright ownership. |
| .\" The ASF licenses this file to You under the Apache License, Version 2.0 |
| .\" (the "License"); you may not use this file except in compliance with |
| .\" the License. You may obtain a copy of the License at |
| .\" |
| .\" http://www.apache.org/licenses/LICENSE-2.0 |
| .\" |
| .\" Unless required by applicable law or agreed to in writing, software |
| .\" distributed under the License is distributed on an "AS IS" BASIS, |
| .\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| .\" See the License for the specific language governing permissions and |
| .\" limitations under the License. |
| .\" |
| .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 |
| .\" |
| .\" Standard preamble: |
| .\" ======================================================================== |
| .de Sh \" Subsection heading |
| .br |
| .if t .Sp |
| .ne 5 |
| .PP |
| \fB\\$1\fR |
| .PP |
| .. |
| .de Sp \" Vertical space (when we can't use .PP) |
| .if t .sp .5v |
| .if n .sp |
| .. |
| .de Vb \" Begin verbatim text |
| .ft CW |
| .nf |
| .ne \\$1 |
| .. |
| .de Ve \" End verbatim text |
| .ft R |
| .fi |
| .. |
| .\" Set up some character translations and predefined strings. \*(-- will |
| .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
| .\" double quote, and \*(R" will give a right double quote. | will give a |
| .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to |
| .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' |
| .\" expand to `' in nroff, nothing in troff, for use with C<>. |
| .tr \(*W-|\(bv\*(Tr |
| .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
| .ie n \{\ |
| . ds -- \(*W- |
| . ds PI pi |
| . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
| . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
| . ds L" "" |
| . ds R" "" |
| . ds C` "" |
| . ds C' "" |
| 'br\} |
| .el\{\ |
| . ds -- \|\(em\| |
| . ds PI \(*p |
| . ds L" `` |
| . ds R" '' |
| 'br\} |
| .\" |
| .\" If the F register is turned on, we'll generate index entries on stderr for |
| .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index |
| .\" entries marked with X<> in POD. Of course, you'll have to process the |
| .\" output yourself in some meaningful fashion. |
| .if \nF \{\ |
| . de IX |
| . tm Index:\\$1\t\\n%\t"\\$2" |
| .. |
| . nr % 0 |
| . rr F |
| .\} |
| .\" |
| .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
| .\" way too many mistakes in technical documents. |
| .hy 0 |
| .if n .na |
| .\" |
| .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
| .\" Fear. Run. Save yourself. No user-serviceable parts. |
| . \" fudge factors for nroff and troff |
| .if n \{\ |
| . ds #H 0 |
| . ds #V .8m |
| . ds #F .3m |
| . ds #[ \f1 |
| . ds #] \fP |
| .\} |
| .if t \{\ |
| . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
| . ds #V .6m |
| . ds #F 0 |
| . ds #[ \& |
| . ds #] \& |
| .\} |
| . \" simple accents for nroff and troff |
| .if n \{\ |
| . ds ' \& |
| . ds ` \& |
| . ds ^ \& |
| . ds , \& |
| . ds ~ ~ |
| . ds / |
| .\} |
| .if t \{\ |
| . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
| . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
| . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
| . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
| . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
| . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
| .\} |
| . \" troff and (daisy-wheel) nroff accents |
| .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
| .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
| .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
| .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
| .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
| .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
| .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
| .ds ae a\h'-(\w'a'u*4/10)'e |
| .ds Ae A\h'-(\w'A'u*4/10)'E |
| . \" corrections for vroff |
| .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
| .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
| . \" for low resolution devices (crt and lpr) |
| .if \n(.H>23 .if \n(.V>19 \ |
| \{\ |
| . ds : e |
| . ds 8 ss |
| . ds o a |
| . ds d- d\h'-1'\(ga |
| . ds D- D\h'-1'\(hy |
| . ds th \o'bp' |
| . ds Th \o'LP' |
| . ds ae ae |
| . ds Ae AE |
| .\} |
| .rm #[ #] #H #V #F C |
| .\" ======================================================================== |
| .\" |
| .IX Title "hadoop 1" |
| .TH hadoop 1 "2009-02-23" "hadoop" "Hadoop" |
| .SH "NAME" |
| .Vb 6 |
| \& __ __ __ |
| \& / / / /___ _____/ /___ ____ ____ |
| \& / /_/ / __ `/ __ / __ \e/ __ \e/ __ \e |
| \& / __ / /_/ / /_/ / /_/ / /_/ / /_/ / |
| \& /_/ /_/\e__,_/\e__,_/\e____/\e____/ .___/ |
| \& /_/ |
| .Ve |
| .PP |
| Hadoop \- Hadoop is a software platform that lets one easily write and run applications that process vast amounts of data. |
| .SH "SYNOPSIS" |
| .IX Header "SYNOPSIS" |
| .PP |
| .B hadoop |
| .RB [\-\-config\ confdir] |
| .I COMMAND |
| .PP |
| .B hdfs |
| .RB [\-\-config\ confdir] |
| .I COMMAND |
| .PP |
| .B yarn |
| .RB [\-\-config\ confdir] |
| .I COMMAND |
| .PP |
| .B mapred |
| .RB [\-\-config\ confdir] |
| .I COMMAND |
| .SH "DESCRIPTION" |
| .IX Header "DESCRIPTION" |
| Here's what makes Hadoop especially useful: |
| .IP "Scalable" 4 |
| .IX Item "Scalable" |
| Hadoop can reliably store and process petabytes. |
| .IP "Economical" 4 |
| .IX Item "Economical" |
| It distributes the data and processing across clusters of commonly available computers. These clusters can number into the thousands of nodes. |
| .IP "Efficient" 4 |
| .IX Item "Efficient" |
| By distributing the data, Hadoop can process it in parallel on the nodes where the data is located. This makes it extremely rapid. |
| .IP "Reliable" 4 |
| .IX Item "Reliable" |
| Hadoop automatically maintains multiple copies of data and automatically redeploys computing tasks based on failures. |
| .PP |
| Hadoop implements MapReduce, using the Hadoop Distributed File System (\s-1HDFS\s0) (see figure below.) MapReduce divides applications into many small blocks of work. \s-1HDFS\s0 creates multiple replicas of data blocks for reliability, placing them on compute nodes around the cluster. MapReduce can then process the data where it is located. |
| .PP |
| For more details about hadoop, see the Hadoop Wiki at http://wiki.apache.org/hadoop/. |
| .SH "OPTIONS" |
| .IX Header "OPTIONS" |
| .IP "\-\-config configdir" 4 |
| .IX Item "--config configdir" |
| Overrides the \f(CW\*(C`HADOOP_CONF_DIR\*(C'\fR environment variable. See \f(CW\*(C`ENVIRONMENT\*(C'\fR section below. |
| .SH "COMMANDS" |
| .IX Header "COMMANDS" |
| .PP |
| Run each tool (hadoop, hdfs, yarn, mapred) without arguments to access the built-in tool documentation. |
| .SH "FILES" |
| .IX Header "FILES" |
| .IP "/etc/hadoop/conf" 4 |
| .IX Item "/etc/hadoop/conf" |
| This symbolic link points to the currently active Hadoop configuration directory. |
| .RS 4 |
| .IP "\fBNote to Hadoop System Admins\fR" 8 |
| .IX Item "Note to Hadoop System Admins" |
| The \f(CW\*(C`/etc/hadoop/conf\*(C'\fR link is managed by the \fIalternatives\fR\|(8) command so you should \fBnot\fR change this |
| symlink directly. |
| .Sp |
| To see what current \fIalternative\fR\|(8) Hadoop configurations you have, run the following command: |
| .Sp |
| .Vb 6 |
| \& # alternatives --display hadoop-conf |
| \& hadoop-conf - status is auto. |
| \& link currently points to /etc/hadoop/conf.pseudo |
| \& /etc/hadoop/conf.pseudo - priority 10 |
| \& Current `best' version is /etc/hadoop/conf.pseudo. |
| .Ve |
| .Sp |
| This shows that the link point to \f(CW\*(C`/etc/hadoop/conf.pseudo\*(C'\fR (for the Hadoop Pseudo-Distributed configuration). |
| .Sp |
| To add a new custom configuration, run the following commands as root: |
| .Sp |
| .Vb 1 |
| \& # cp -r /etc/hadoop/conf.empty /etc/hadoop/conf.my |
| .Ve |
| .Sp |
| This will create a new configuration directory, \f(CW\*(C`/etc/hadoop/conf.my\*(C'\fR, that serves as a |
| starting point for a new configuration. Edit the configuration files in \f(CW\*(C`/etc/hadoop/conf.my\*(C'\fR |
| until you have the configuration you want. |
| .Sp |
| To activate your new configuration and see the new configuration list: |
| .Sp |
| .Vb 1 |
| \& # alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.my 90 |
| .Ve |
| .Sp |
| You can verify your new configuration is active by runnning the following: |
| .Sp |
| .Vb 7 |
| \& # alternatives --display hadoop-conf |
| \& hadoop - status is auto. |
| \& link currently points to /etc/hadoop/conf.my |
| \& /etc/hadoop/conf.empty - priority 10 |
| \& /etc/hadoop/conf.pseudo - priority 30 |
| \& /etc/hadoop/conf.my - priority 90 |
| \& Current `best' version is /etc/hadoop/conf.my. |
| .Ve |
| .Sp |
| At this point, it might be a good idea to restart your services with the new configuration, e.g., |
| .Sp |
| .Vb 1 |
| \& # /etc/init.d/hadoop-hdfs-namenode restart |
| .Ve |
| .RE |
| .RS 4 |
| .RE |
| .IP "/usr/bin/hadoop\-config.sh" 4 |
| .IX Item "/usr/bin/hadoop-config.sh" |
| This script sets up environment variables that Hadoop components need at startup (see \f(CW\*(C`ENVIRONMENT\*(C'\fR section). |
| .IP "/etc/init.d/hadoop\-hdfs\-namenode" 4 |
| .IX Item "/etc/init.d/hadoop-hdfs-namenode" |
| Service script for starting and stopping the Hadoop NameNode |
| .IP "/etc/init.d/hadoop\-hdfs\-datanode" 4 |
| .IX Item "/etc/init.d/hadoop-hdfs-datanode" |
| Service script for starting and stopping the Hadoop DataNode |
| .IP "/etc/init.d/hadoop\-hdfs\-secondarynamenode" 4 |
| .IX Item "/etc/init.d/hadoop-secondarynamenode" |
| Service script for starting and stopping the Hadoop Secondary NameNode |
| .IP "/etc/init.d/hadoop\-hdfs\-zkfc" 4 |
| .IX Item "/etc/init.d/hadoop-hdfs-zkfc" |
| Service script for starting and stopping the Hadoop HDFS failover controller |
| .IP "/etc/init.d/hadoop\-hdfs\-journalnode" 4 |
| .IX Item "/etc/init.d/hadoop-hdfs-journalnode" |
| Service script for starting and stopping the Hadoop HDFS JournalNode |
| .IP "/etc/init.d/hadoop\-yarn\-resourcemanager" 4 |
| .IX Item "/etc/init.d/hadoop-yarn-resourcemanager" |
| Service script for starting and stopping the Hadoop YARN Resource Manager |
| .IP "/etc/init.d/hadoop\-yarn\-nodemanager" 4 |
| .IX Item "/etc/init.d/hadoop-yarn-nodemanager" |
| Service script for starting and stopping the Hadoop YARN Node Manager |
| .IP "/etc/init.d/hadoop\-yarn\-proxyserver" 4 |
| .IX Item "/etc/init.d/hadoop-yarn-proxyserver" |
| Service script for starting and stopping the Hadoop YARN Web Proxy |
| .IP "/etc/init.d/hadoop\-mapreduce\-historyserver" 4 |
| .IX Item "/etc/init.d/hadoop-mapreduce-historyserver" |
| Service script for starting and stopping the Hadoop MapReduce Historyserver |
| .SH "ENVIRONMENT" |
| .IX Header "ENVIRONMENT" |
| .IP "\s-1JAVA_HOME\s0" 4 |
| .IX Item "JAVA_HOME" |
| Hadoop will honor the location of your \f(CW\*(C`JAVA_HOME\*(C'\fR environment variable. Hadoop requires Sun Java 1.6 |
| which can be downloaded from http://java.sun.com. |
| .IP "\s-1HADOOP_CONF_DIR\s0" 4 |
| .IX Item "HADOOP_CONF_DIR" |
| The location of the Hadoop configuration files. Defaults to \f(CW\*(C`/etc/hadoop/conf\*(C'\fR. For more details, |
| see the \f(CW\*(C`FILES\*(C'\fR section. |
| .IP "\s-1HADOOP_MAPRED_HOME\s0" 4 |
| .IX Item "HADOOP_MAPRED_HOME" |
| The location of the Hadoop MapReduce implementation jar files are by default in \f(CW\*(C`/usr/lib/hadoop-mapreduce\*(C'\fR. You can change the location with this environment variable. |
| .IP "\s-1HADOOP_COMMON_HOME\s0" 4 |
| .IX Item "HADOOP_COMMON_HOME" |
| The location of the Hadoop common jar files are by default in \f(CW\*(C`/usr/lib/hadoop\*(C'\fR. You can change the location |
| with this environment variable (not recommeded). |
| .IP "\s-1HADOOP_HDFS_HOME\s0" 4 |
| .IX Item "HADOOP_HDFS_HOME" |
| The location of the Hadoop HDFS jar files are by default in \f(CW\*(C`/usr/lib/hadoop-hdfs\*(C'\fR. You can change the location |
| with this environment variable (not recommeded). |
| .IP "\s-1HADOOP_YARN_HOME\s0" 4 |
| .IX Item "HADOOP_YARN_HOME" |
| The location of the Hadoop YARN jar files are by default in \f(CW\*(C`/usr/lib/hadoop-yarn\*(C'\fR. You can change the location |
| with this environment variable (not recommeded). |
| .SH "EXAMPLES" |
| .IX Header "EXAMPLES" |
| .Vb 4 |
| \& $ mkdir input |
| \& $ cp <txt files> input |
| \& $ HADOOP_CONF_DIR=/ hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar grep input output 'string' |
| \& $ cat output/* |
| .Ve |
| .SH "COPYRIGHT" |
| .IX Header "COPYRIGHT" |
| Copyright © 2008 The Apache Software Foundation. All rights reserved. |
| .SH "SEE ALSO" |
| .IX Header "SEE ALSO" |
| \&\fIjava\fR\|(1), \fIalternatives\fR\|(8) |