blob: 57375247139de686f39e703377e6928fe8cabb18 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Hadoop RPM spec file
#
# FIXME: we need to disable a more strict checks on native files for now,
# since Hadoop build system makes it difficult to pass the kind of flags
# that would make newer RPM debuginfo generation scripts happy.
%undefine _missing_build_ids_terminate_build
%define hadoop_name hadoop
%define etc_hadoop /etc/%{name}
%define config_hadoop %{etc_hadoop}/conf
%define lib_hadoop_dirname /usr/lib
%define lib_hadoop %{lib_hadoop_dirname}/%{name}
%define log_hadoop_dirname /var/log
%define log_hadoop %{log_hadoop_dirname}/%{name}
%define bin_hadoop %{_bindir}
%define man_hadoop %{_mandir}
%define src_hadoop /usr/src/%{name}
%define hadoop_username mapred
%define hadoop_services namenode secondarynamenode datanode jobtracker tasktracker
# Hadoop outputs built binaries into %{hadoop_build}
%define hadoop_build_path build
%define static_images_dir src/webapps/static/images
%ifarch i386
%global hadoop_arch Linux-i386-32
%endif
%ifarch amd64 x86_64
%global hadoop_arch Linux-amd64-64
%endif
# CentOS 5 does not have any dist macro
# So I will suppose anything that is not Mageia or a SUSE will be a RHEL/CentOS/Fedora
%if %{!?suse_version:1}0 && %{!?mgaversion:1}0
# brp-repack-jars uses unzip to expand jar files
# Unfortunately aspectjtools-1.6.5.jar pulled by ivy contains some files and directories without any read permission
# and make whole process to fail.
# So for now brp-repack-jars is being deactivated until this is fixed.
# See CDH-2151
%define __os_install_post \
/usr/lib/rpm/redhat/brp-compress ; \
/usr/lib/rpm/redhat/brp-strip-static-archive %{__strip} ; \
/usr/lib/rpm/redhat/brp-strip-comment-note %{__strip} %{__objdump} ; \
/usr/lib/rpm/brp-python-bytecompile ; \
%{nil}
%define doc_hadoop %{_docdir}/%{name}-%{hadoop_version}
%define alternatives_cmd alternatives
%global initd_dir %{_sysconfdir}/rc.d/init.d
%endif
%if %{?suse_version:1}0
# Only tested on openSUSE 11.4. le'ts update it for previous release when confirmed
%if 0%{suse_version} > 1130
%define suse_check \# Define an empty suse_check for compatibility with older sles
%endif
# Deactivating symlinks checks
%define __os_install_post \
%{suse_check} ; \
/usr/lib/rpm/brp-compress ; \
%{nil}
%define doc_hadoop %{_docdir}/%{name}
%define alternatives_cmd update-alternatives
%global initd_dir %{_sysconfdir}/rc.d
%endif
%if 0%{?mgaversion}
%define doc_hadoop %{_docdir}/%{name}-%{hadoop_version}
%define alternatives_cmd update-alternatives
%global initd_dir %{_sysconfdir}/rc.d/init.d
%endif
# Even though we split the RPM into arch and noarch, it still will build and install
# the entirety of hadoop. Defining this tells RPM not to fail the build
# when it notices that we didn't package most of the installed files.
%define _unpackaged_files_terminate_build 0
# RPM searches perl files for dependancies and this breaks for non packaged perl lib
# like thrift so disable this
%define _use_internal_dependency_generator 0
Name: %{hadoop_name}
Version: %{hadoop_version}
Release: %{hadoop_release}
Summary: Hadoop is a software platform for processing vast amounts of data
License: Apache License v2.0
URL: http://hadoop.apache.org/core/
Group: Development/Libraries
Source0: %{name}-%{hadoop_base_version}.tar.gz
Source1: do-component-build
Source2: install_%{name}.sh
Source3: hadoop.default
Source4: hadoop-init.tmpl
Source5: hadoop-init.tmpl.suse
Source6: hadoop.1
Source7: hadoop-fuse-dfs.1
Source8: hadoop-fuse.default
Source9: hdfs.conf
Source10: mapred.conf
Buildroot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildRequires: python >= 2.4, git, fuse-devel,fuse, automake, autoconf
Requires: coreutils, /usr/sbin/useradd, /usr/sbin/usermod, /sbin/chkconfig, /sbin/service, bigtop-utils
Provides: hadoop
%if %{?suse_version:1}0
BuildRequires: libfuse2, libopenssl-devel, gcc-c++, ant, ant-nodeps, ant-trax
# Required for init scripts
Requires: sh-utils, insserv
%endif
# CentOS 5 does not have any dist macro
# So I will suppose anything that is not Mageia or a SUSE will be a RHEL/CentOS/Fedora
%if %{!?suse_version:1}0 && %{!?mgaversion:1}0
BuildRequires: fuse-libs, libtool, redhat-rpm-config, lzo-devel, openssl-devel
# Required for init scripts
Requires: sh-utils, redhat-lsb
%endif
%if 0%{?mgaversion}
BuildRequires: libfuse-devel, libfuse2 , libopenssl-devel, gcc-c++, ant, libtool, automake, autoconf, liblzo-devel, zlib-devel
Requires: chkconfig, xinetd-simple-services, zlib, initscripts
%endif
%description
Hadoop is a software platform that lets one easily write and
run applications that process vast amounts of data.
Here's what makes Hadoop especially useful:
* Scalable: Hadoop can reliably store and process petabytes.
* Economical: It distributes the data and processing across clusters
of commonly available computers. These clusters can number
into the thousands of nodes.
* Efficient: By distributing the data, Hadoop can process it in parallel
on the nodes where the data is located. This makes it
extremely rapid.
* Reliable: Hadoop automatically maintains multiple copies of data and
automatically redeploys computing tasks based on failures.
Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
MapReduce divides applications into many small blocks of work. HDFS creates
multiple replicas of data blocks for reliability, placing them on compute
nodes around the cluster. MapReduce can then process the data where it is
located.
%package namenode
Summary: The Hadoop namenode manages the block locations of HDFS files
Group: System/Daemons
Requires: %{name} = %{version}-%{release}
Requires(pre): %{name} = %{version}-%{release}
%description namenode
The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
namenode, which manages the block locations of files on the filesystem.
%package secondarynamenode
Summary: Hadoop Secondary namenode
Group: System/Daemons
Requires: %{name} = %{version}-%{release}
Requires(pre): %{name} = %{version}-%{release}
%description secondarynamenode
The Secondary Name Node periodically compacts the Name Node EditLog
into a checkpoint. This compaction ensures that Name Node restarts
do not incur unnecessary downtime.
%package jobtracker
Summary: Hadoop Job Tracker
Group: System/Daemons
Requires: %{name} = %{version}-%{release}
Requires(pre): %{name} = %{version}-%{release}
%description jobtracker
The jobtracker is a central service which is responsible for managing
the tasktracker services running on all nodes in a Hadoop Cluster.
The jobtracker allocates work to the tasktracker nearest to the data
with an available work slot.
%package datanode
Summary: Hadoop Data Node
Group: System/Daemons
Requires: %{name} = %{version}-%{release}
Requires(pre): %{name} = %{version}-%{release}
%description datanode
The Data Nodes in the Hadoop Cluster are responsible for serving up
blocks of data over the network to Hadoop Distributed Filesystem
(HDFS) clients.
%package tasktracker
Summary: Hadoop Task Tracker
Group: System/Daemons
Requires: %{name} = %{version}-%{release}
Requires(pre): %{name} = %{version}-%{release}
%description tasktracker
The tasktracker has a fixed number of work slots. The jobtracker
assigns MapReduce work to the tasktracker that is nearest the data
with an available work slot.
%package conf-pseudo
Summary: Hadoop installation in pseudo-distributed mode
Group: System/Daemons
Requires: %{name} = %{version}-%{release}, %{name}-namenode = %{version}-%{release}, %{name}-datanode = %{version}-%{release}, %{name}-secondarynamenode = %{version}-%{release}, %{name}-tasktracker = %{version}-%{release}, %{name}-jobtracker = %{version}-%{release}
%description conf-pseudo
Installation of this RPM will setup your machine to run in pseudo-distributed mode
where each Hadoop daemon runs in a separate Java process.
%package doc
Summary: Hadoop Documentation
Group: Documentation
%description doc
Documentation for Hadoop
%package source
Summary: Source code for Hadoop
Group: System/Daemons
AutoReq: no
%description source
The Java source code for Hadoop and its contributed packages. This is handy when
trying to debug programs that depend on Hadoop.
%package fuse
Summary: Mountable HDFS
Group: Development/Libraries
Requires: %{name} = %{version}-%{release}, fuse
AutoReq: no
%if %{?suse_version:1}0
Requires: libfuse2
%else
Requires: fuse-libs
%endif
%description fuse
These projects (enumerated below) allow HDFS to be mounted (on most flavors of Unix) as a standard file system using the mount command. Once mounted, the user can operate on an instance of hdfs using standard Unix utilities such as 'ls', 'cd', 'cp', 'mkdir', 'find', 'grep', or use standard Posix libraries like open, write, read, close from C, C++, Python, Ruby, Perl, Java, bash, etc.
%package native
Summary: Native libraries for Hadoop Compression
Group: Development/Libraries
Requires: %{name} = %{version}-%{release}
AutoReq: no
%description native
Native libraries for Hadoop compression
%package libhdfs
Summary: Hadoop Filesystem Library
Group: Development/Libraries
Requires: %{name} = %{version}-%{release}
# TODO: reconcile libjvm
AutoReq: no
%description libhdfs
Hadoop Filesystem Library
%package pipes
Summary: Hadoop Pipes Library
Group: Development/Libraries
Requires: %{name} = %{version}-%{release}
%description pipes
Hadoop Pipes Library
%package sbin
Summary: Binaries for secured Hadoop clusters
Group: System/Daemons
Requires: %{name} = %{version}-%{release}
%description sbin
This package contains a setuid program, 'task-controller', which is used for
launching MapReduce tasks in a secured MapReduce cluster. This program allows
the tasks to run as the Unix user who submitted the job, rather than the
Unix user running the MapReduce daemons.
This package also contains 'jsvc', a daemon wrapper necessary to allow
DataNodes to bind to a low (privileged) port and then drop root privileges
before continuing operation.
%prep
%setup -n apache-hadoop-common-7471fab
%build
# This assumes that you installed Java JDK 6 and set JAVA_HOME
# This assumes that you installed Java JDK 5 and set JAVA5_HOME
# This assumes that you installed Forrest and set FORREST_HOME
env HADOOP_VERSION=%{hadoop_version} HADOOP_ARCH=%{hadoop_arch} bash %{SOURCE1}
%clean
%__rm -rf $RPM_BUILD_ROOT
#########################
#### INSTALL SECTION ####
#########################
%install
%__rm -rf $RPM_BUILD_ROOT
%__install -d -m 0755 $RPM_BUILD_ROOT/%{lib_hadoop}
bash %{SOURCE2} \
--distro-dir=$RPM_SOURCE_DIR \
--build-dir=$PWD/build/%{name}-%{version} \
--src-dir=$RPM_BUILD_ROOT%{src_hadoop} \
--lib-dir=$RPM_BUILD_ROOT%{lib_hadoop} \
--system-lib-dir=%{_libdir} \
--etc-dir=$RPM_BUILD_ROOT%{etc_hadoop} \
--prefix=$RPM_BUILD_ROOT \
--doc-dir=$RPM_BUILD_ROOT%{doc_hadoop} \
--example-dir=$RPM_BUILD_ROOT%{doc_hadoop}/examples \
--native-build-string=%{hadoop_arch} \
--installed-lib-dir=%{lib_hadoop} \
--man-dir=$RPM_BUILD_ROOT%{man_hadoop} \
%__mv -f $RPM_BUILD_ROOT/usr/share/doc/libhdfs-devel $RPM_BUILD_ROOT/%{_docdir}/libhdfs-%{hadoop_version}
# Init.d scripts
%__install -d -m 0755 $RPM_BUILD_ROOT/%{initd_dir}/
%if %{?suse_version:1}0
orig_init_file=$RPM_SOURCE_DIR/hadoop-init.tmpl.suse
%else
orig_init_file=$RPM_SOURCE_DIR/hadoop-init.tmpl
%endif
# Generate the init.d scripts
for service in %{hadoop_services}
do
init_file=$RPM_BUILD_ROOT/%{initd_dir}/%{name}-${service}
%__cp $orig_init_file $init_file
%__sed -i -e 's|@HADOOP_COMMON_ROOT@|%{lib_hadoop}|' $init_file
%__sed -i -e "s|@HADOOP_DAEMON@|${service}|" $init_file
%__sed -i -e 's|@HADOOP_CONF_DIR@|%{config_hadoop}|' $init_file
case "$service" in
hadoop_services|namenode|secondarynamenode|datanode)
%__sed -i -e 's|@HADOOP_DAEMON_USER@|hdfs|' $init_file
;;
jobtracker|tasktracker)
%__sed -i -e 's|@HADOOP_DAEMON_USER@|mapred|' $init_file
;;
esac
chmod 755 $init_file
done
%__install -d -m 0755 $RPM_BUILD_ROOT/etc/default
%__cp $RPM_SOURCE_DIR/hadoop.default $RPM_BUILD_ROOT/etc/default/hadoop
%__cp $RPM_SOURCE_DIR/hadoop-fuse.default $RPM_BUILD_ROOT/etc/default/hadoop-fuse
%__install -d -m 0755 $RPM_BUILD_ROOT/etc/security/limits.d
%__install -m 0644 %{SOURCE9} $RPM_BUILD_ROOT/etc/security/limits.d/hdfs.conf
%__install -m 0644 %{SOURCE10} $RPM_BUILD_ROOT/etc/security/limits.d/mapred.conf
# /var/lib/hadoop/cache
%__install -d -m 1777 $RPM_BUILD_ROOT/var/lib/%{name}/cache
# /var/log/hadoop
%__install -d -m 0755 $RPM_BUILD_ROOT/var/log
%__install -d -m 0775 $RPM_BUILD_ROOT/var/run/%{name}
%__install -d -m 0775 $RPM_BUILD_ROOT/%{log_hadoop}
%pre
getent group hadoop >/dev/null || groupadd -r hadoop
getent group hdfs >/dev/null || groupadd -r hdfs
getent group mapred >/dev/null || groupadd -r mapred
getent passwd mapred >/dev/null || /usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r -g mapred -G hadoop --home %{lib_hadoop} mapred
# Create an hdfs user if one does not already exist.
getent passwd hdfs >/dev/null || /usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r -g hdfs -G hadoop --home %{lib_hadoop} hdfs
%post
%{alternatives_cmd} --install %{config_hadoop} %{name}-conf %{etc_hadoop}/conf.empty 10
%{alternatives_cmd} --install %{bin_hadoop}/%{hadoop_name} %{hadoop_name}-default %{bin_hadoop}/%{name} 20 \
--slave %{log_hadoop_dirname}/%{hadoop_name} %{hadoop_name}-log %{log_hadoop} \
--slave %{lib_hadoop_dirname}/%{hadoop_name} %{hadoop_name}-lib %{lib_hadoop} \
--slave /etc/%{hadoop_name} %{hadoop_name}-etc %{etc_hadoop} \
--slave %{man_hadoop}/man1/%{hadoop_name}.1.*z %{hadoop_name}-man %{man_hadoop}/man1/%{name}.1.*z
%preun
if [ "$1" = 0 ]; then
# Stop any services that might be running
for service in %{hadoop_services}
do
service hadoop-$service stop 1>/dev/null 2>/dev/null || :
done
%{alternatives_cmd} --remove %{name}-conf %{etc_hadoop}/conf.empty || :
%{alternatives_cmd} --remove %{hadoop_name}-default %{bin_hadoop}/%{name} || :
fi
%files
%defattr(-,root,root)
%config(noreplace) %{etc_hadoop}/conf.empty
%config(noreplace) /etc/default/hadoop
%config(noreplace) /etc/security/limits.d/hdfs.conf
%config(noreplace) /etc/security/limits.d/mapred.conf
%{lib_hadoop}
%{bin_hadoop}/%{name}
%{man_hadoop}/man1/hadoop.1.*z
%attr(0775,root,hadoop) /var/run/%{name}
%attr(0775,root,hadoop) %{log_hadoop}
%exclude %{lib_hadoop}/lib/native
%exclude %{lib_hadoop}/sbin/%{hadoop_arch}
%exclude %{lib_hadoop}/bin/fuse_dfs
# FIXME: The following is a workaround for BIGTOP-139
%exclude %{lib_hadoop}/bin/task-controller
%exclude %{lib_hadoop}/libexec/jsvc*
%files doc
%defattr(-,root,root)
%doc %{doc_hadoop}
%files source
%defattr(-,root,root)
%{src_hadoop}
# Service file management RPMs
%define service_macro() \
%files %1 \
%defattr(-,root,root) \
%{initd_dir}/%{name}-%1 \
%post %1 \
chkconfig --add %{name}-%1 \
\
%preun %1 \
if [ $1 = 0 ]; then \
service %{name}-%1 stop > /dev/null 2>&1 \
chkconfig --del %{name}-%1 \
fi \
%postun %1 \
if [ $1 -ge 1 ]; then \
service %{name}-%1 condrestart >/dev/null 2>&1 \
fi
%service_macro namenode
%service_macro secondarynamenode
%service_macro datanode
%service_macro jobtracker
%service_macro tasktracker
# Pseudo-distributed Hadoop installation
%post conf-pseudo
%{alternatives_cmd} --install %{config_hadoop} %{name}-conf %{etc_hadoop}/conf.pseudo 30
%files conf-pseudo
%defattr(-,root,root)
%config(noreplace) %attr(755,root,root) %{etc_hadoop}/conf.pseudo
%dir %attr(0755,root,hadoop) /var/lib/%{name}
%dir %attr(1777,root,hadoop) /var/lib/%{name}/cache
%preun conf-pseudo
if [ "$1" = 0 ]; then
%{alternatives_cmd} --remove %{name}-conf %{etc_hadoop}/conf.pseudo
rm -f %{etc_hadoop}/conf
fi
%files native
%defattr(-,root,root)
%{lib_hadoop}/lib/native
%files fuse
%defattr(-,root,root)
%config(noreplace) /etc/default/hadoop-fuse
%attr(0755,root,root) %{lib_hadoop}/bin/fuse_dfs
%attr(0755,root,root) %{lib_hadoop}/bin/fuse_dfs_wrapper.sh
%attr(0755,root,root) %{bin_hadoop}/hadoop-fuse-dfs
%attr(0644,root,root) %{man_hadoop}/man1/hadoop-fuse-dfs.1.*
%config(noreplace) /etc/default/hadoop-fuse
%files pipes
%defattr(-,root,root)
%{_libdir}/libhadooppipes*
%{_libdir}/libhadooputil*
%{_includedir}/hadoop/*
%files libhdfs
%defattr(-,root,root)
%{_libdir}/libhdfs*
%{_includedir}/hdfs.h
# -devel should be its own package
%doc %{_docdir}/libhdfs-%{hadoop_version}
%files sbin
%defattr(-,root,root)
%dir %{lib_hadoop}/sbin
%dir %{lib_hadoop}/sbin/%{hadoop_arch}
%attr(4750,root,mapred) %{lib_hadoop}/sbin/%{hadoop_arch}/task-controller
%attr(0755,root,root) %{lib_hadoop}/sbin/%{hadoop_arch}/jsvc
# FIXME: The following is a workaround for BIGTOP-139
%attr(4750,root,mapred) %{lib_hadoop}/bin/task-controller
%attr(0755,root,root) %{lib_hadoop}/libexec/jsvc*