| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| # Hadoop RPM spec file |
| # |
| |
| # FIXME: we need to disable a more strict checks on native files for now, |
| # since Hadoop build system makes it difficult to pass the kind of flags |
| # that would make newer RPM debuginfo generation scripts happy. |
| %undefine _missing_build_ids_terminate_build |
| |
| %define hadoop_name hadoop |
| %define etc_hadoop /etc/%{name} |
| %define config_hadoop %{etc_hadoop}/conf |
| %define lib_hadoop_dirname /usr/lib |
| %define lib_hadoop %{lib_hadoop_dirname}/%{name} |
| %define log_hadoop_dirname /var/log |
| %define log_hadoop %{log_hadoop_dirname}/%{name} |
| %define bin_hadoop %{_bindir} |
| %define man_hadoop %{_mandir} |
| %define src_hadoop /usr/src/%{name} |
| %define hadoop_username mapred |
| %define hadoop_services namenode secondarynamenode datanode jobtracker tasktracker |
| # Hadoop outputs built binaries into %{hadoop_build} |
| %define hadoop_build_path build |
| %define static_images_dir src/webapps/static/images |
| |
| %ifarch i386 |
| %global hadoop_arch Linux-i386-32 |
| %endif |
| %ifarch amd64 x86_64 |
| %global hadoop_arch Linux-amd64-64 |
| %endif |
| |
| # CentOS 5 does not have any dist macro |
| # So I will suppose anything that is not Mageia or a SUSE will be a RHEL/CentOS/Fedora |
| %if %{!?suse_version:1}0 && %{!?mgaversion:1}0 |
| # brp-repack-jars uses unzip to expand jar files |
| # Unfortunately aspectjtools-1.6.5.jar pulled by ivy contains some files and directories without any read permission |
| # and make whole process to fail. |
| # So for now brp-repack-jars is being deactivated until this is fixed. |
| # See CDH-2151 |
| %define __os_install_post \ |
| /usr/lib/rpm/redhat/brp-compress ; \ |
| /usr/lib/rpm/redhat/brp-strip-static-archive %{__strip} ; \ |
| /usr/lib/rpm/redhat/brp-strip-comment-note %{__strip} %{__objdump} ; \ |
| /usr/lib/rpm/brp-python-bytecompile ; \ |
| %{nil} |
| |
| %define doc_hadoop %{_docdir}/%{name}-%{hadoop_version} |
| %define alternatives_cmd alternatives |
| %global initd_dir %{_sysconfdir}/rc.d/init.d |
| %endif |
| |
| |
| %if %{?suse_version:1}0 |
| |
| # Only tested on openSUSE 11.4. le'ts update it for previous release when confirmed |
| %if 0%{suse_version} > 1130 |
| %define suse_check \# Define an empty suse_check for compatibility with older sles |
| %endif |
| |
| # Deactivating symlinks checks |
| %define __os_install_post \ |
| %{suse_check} ; \ |
| /usr/lib/rpm/brp-compress ; \ |
| %{nil} |
| |
| %define doc_hadoop %{_docdir}/%{name} |
| %define alternatives_cmd update-alternatives |
| %global initd_dir %{_sysconfdir}/rc.d |
| %endif |
| |
| %if 0%{?mgaversion} |
| %define doc_hadoop %{_docdir}/%{name}-%{hadoop_version} |
| %define alternatives_cmd update-alternatives |
| %global initd_dir %{_sysconfdir}/rc.d/init.d |
| %endif |
| |
| |
| # Even though we split the RPM into arch and noarch, it still will build and install |
| # the entirety of hadoop. Defining this tells RPM not to fail the build |
| # when it notices that we didn't package most of the installed files. |
| %define _unpackaged_files_terminate_build 0 |
| |
| # RPM searches perl files for dependancies and this breaks for non packaged perl lib |
| # like thrift so disable this |
| %define _use_internal_dependency_generator 0 |
| |
| Name: %{hadoop_name} |
| Version: %{hadoop_version} |
| Release: %{hadoop_release} |
| Summary: Hadoop is a software platform for processing vast amounts of data |
| License: Apache License v2.0 |
| URL: http://hadoop.apache.org/core/ |
| Group: Development/Libraries |
| Source0: %{name}-%{hadoop_base_version}.tar.gz |
| Source1: do-component-build |
| Source2: install_%{name}.sh |
| Source3: hadoop.default |
| Source4: hadoop-init.tmpl |
| Source5: hadoop-init.tmpl.suse |
| Source6: hadoop.1 |
| Source7: hadoop-fuse-dfs.1 |
| Source8: hadoop-fuse.default |
| Source9: hdfs.conf |
| Source10: mapred.conf |
| Buildroot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) |
| BuildRequires: python >= 2.4, git, fuse-devel,fuse, automake, autoconf |
| Requires: coreutils, /usr/sbin/useradd, /usr/sbin/usermod, /sbin/chkconfig, /sbin/service, bigtop-utils |
| Provides: hadoop |
| |
| %if %{?suse_version:1}0 |
| BuildRequires: libfuse2, libopenssl-devel, gcc-c++, ant, ant-nodeps, ant-trax |
| # Required for init scripts |
| Requires: sh-utils, insserv |
| %endif |
| |
| # CentOS 5 does not have any dist macro |
| # So I will suppose anything that is not Mageia or a SUSE will be a RHEL/CentOS/Fedora |
| %if %{!?suse_version:1}0 && %{!?mgaversion:1}0 |
| BuildRequires: fuse-libs, libtool, redhat-rpm-config, lzo-devel, openssl-devel |
| # Required for init scripts |
| Requires: sh-utils, redhat-lsb |
| %endif |
| |
| %if 0%{?mgaversion} |
| BuildRequires: libfuse-devel, libfuse2 , libopenssl-devel, gcc-c++, ant, libtool, automake, autoconf, liblzo-devel, zlib-devel |
| Requires: chkconfig, xinetd-simple-services, zlib, initscripts |
| %endif |
| |
| |
| %description |
| Hadoop is a software platform that lets one easily write and |
| run applications that process vast amounts of data. |
| |
| Here's what makes Hadoop especially useful: |
| * Scalable: Hadoop can reliably store and process petabytes. |
| * Economical: It distributes the data and processing across clusters |
| of commonly available computers. These clusters can number |
| into the thousands of nodes. |
| * Efficient: By distributing the data, Hadoop can process it in parallel |
| on the nodes where the data is located. This makes it |
| extremely rapid. |
| * Reliable: Hadoop automatically maintains multiple copies of data and |
| automatically redeploys computing tasks based on failures. |
| |
| Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS). |
| MapReduce divides applications into many small blocks of work. HDFS creates |
| multiple replicas of data blocks for reliability, placing them on compute |
| nodes around the cluster. MapReduce can then process the data where it is |
| located. |
| |
| |
| %package namenode |
| Summary: The Hadoop namenode manages the block locations of HDFS files |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release} |
| Requires(pre): %{name} = %{version}-%{release} |
| |
| %description namenode |
| The Hadoop Distributed Filesystem (HDFS) requires one unique server, the |
| namenode, which manages the block locations of files on the filesystem. |
| |
| |
| %package secondarynamenode |
| Summary: Hadoop Secondary namenode |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release} |
| Requires(pre): %{name} = %{version}-%{release} |
| |
| %description secondarynamenode |
| The Secondary Name Node periodically compacts the Name Node EditLog |
| into a checkpoint. This compaction ensures that Name Node restarts |
| do not incur unnecessary downtime. |
| |
| |
| %package jobtracker |
| Summary: Hadoop Job Tracker |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release} |
| Requires(pre): %{name} = %{version}-%{release} |
| |
| %description jobtracker |
| The jobtracker is a central service which is responsible for managing |
| the tasktracker services running on all nodes in a Hadoop Cluster. |
| The jobtracker allocates work to the tasktracker nearest to the data |
| with an available work slot. |
| |
| |
| %package datanode |
| Summary: Hadoop Data Node |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release} |
| Requires(pre): %{name} = %{version}-%{release} |
| |
| %description datanode |
| The Data Nodes in the Hadoop Cluster are responsible for serving up |
| blocks of data over the network to Hadoop Distributed Filesystem |
| (HDFS) clients. |
| |
| |
| %package tasktracker |
| Summary: Hadoop Task Tracker |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release} |
| Requires(pre): %{name} = %{version}-%{release} |
| |
| %description tasktracker |
| The tasktracker has a fixed number of work slots. The jobtracker |
| assigns MapReduce work to the tasktracker that is nearest the data |
| with an available work slot. |
| |
| |
| %package conf-pseudo |
| Summary: Hadoop installation in pseudo-distributed mode |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release}, %{name}-namenode = %{version}-%{release}, %{name}-datanode = %{version}-%{release}, %{name}-secondarynamenode = %{version}-%{release}, %{name}-tasktracker = %{version}-%{release}, %{name}-jobtracker = %{version}-%{release} |
| |
| %description conf-pseudo |
| Installation of this RPM will setup your machine to run in pseudo-distributed mode |
| where each Hadoop daemon runs in a separate Java process. |
| |
| %package doc |
| Summary: Hadoop Documentation |
| Group: Documentation |
| %description doc |
| Documentation for Hadoop |
| |
| %package source |
| Summary: Source code for Hadoop |
| Group: System/Daemons |
| AutoReq: no |
| |
| %description source |
| The Java source code for Hadoop and its contributed packages. This is handy when |
| trying to debug programs that depend on Hadoop. |
| |
| %package fuse |
| Summary: Mountable HDFS |
| Group: Development/Libraries |
| Requires: %{name} = %{version}-%{release}, fuse |
| AutoReq: no |
| |
| %if %{?suse_version:1}0 |
| Requires: libfuse2 |
| %else |
| Requires: fuse-libs |
| %endif |
| |
| |
| %description fuse |
| These projects (enumerated below) allow HDFS to be mounted (on most flavors of Unix) as a standard file system using the mount command. Once mounted, the user can operate on an instance of hdfs using standard Unix utilities such as 'ls', 'cd', 'cp', 'mkdir', 'find', 'grep', or use standard Posix libraries like open, write, read, close from C, C++, Python, Ruby, Perl, Java, bash, etc. |
| |
| %package native |
| Summary: Native libraries for Hadoop Compression |
| Group: Development/Libraries |
| Requires: %{name} = %{version}-%{release} |
| AutoReq: no |
| |
| %description native |
| Native libraries for Hadoop compression |
| |
| %package libhdfs |
| Summary: Hadoop Filesystem Library |
| Group: Development/Libraries |
| Requires: %{name} = %{version}-%{release} |
| # TODO: reconcile libjvm |
| AutoReq: no |
| |
| %description libhdfs |
| Hadoop Filesystem Library |
| |
| %package pipes |
| Summary: Hadoop Pipes Library |
| Group: Development/Libraries |
| Requires: %{name} = %{version}-%{release} |
| |
| %description pipes |
| Hadoop Pipes Library |
| |
| %package sbin |
| Summary: Binaries for secured Hadoop clusters |
| Group: System/Daemons |
| Requires: %{name} = %{version}-%{release} |
| |
| %description sbin |
| This package contains a setuid program, 'task-controller', which is used for |
| launching MapReduce tasks in a secured MapReduce cluster. This program allows |
| the tasks to run as the Unix user who submitted the job, rather than the |
| Unix user running the MapReduce daemons. |
| This package also contains 'jsvc', a daemon wrapper necessary to allow |
| DataNodes to bind to a low (privileged) port and then drop root privileges |
| before continuing operation. |
| |
| %prep |
| %setup -n apache-hadoop-common-7471fab |
| |
| %build |
| # This assumes that you installed Java JDK 6 and set JAVA_HOME |
| # This assumes that you installed Java JDK 5 and set JAVA5_HOME |
| # This assumes that you installed Forrest and set FORREST_HOME |
| |
| env HADOOP_VERSION=%{hadoop_version} HADOOP_ARCH=%{hadoop_arch} bash %{SOURCE1} |
| |
| %clean |
| %__rm -rf $RPM_BUILD_ROOT |
| |
| ######################### |
| #### INSTALL SECTION #### |
| ######################### |
| %install |
| %__rm -rf $RPM_BUILD_ROOT |
| |
| %__install -d -m 0755 $RPM_BUILD_ROOT/%{lib_hadoop} |
| |
| bash %{SOURCE2} \ |
| --distro-dir=$RPM_SOURCE_DIR \ |
| --build-dir=$PWD/build/%{name}-%{version} \ |
| --src-dir=$RPM_BUILD_ROOT%{src_hadoop} \ |
| --lib-dir=$RPM_BUILD_ROOT%{lib_hadoop} \ |
| --system-lib-dir=%{_libdir} \ |
| --etc-dir=$RPM_BUILD_ROOT%{etc_hadoop} \ |
| --prefix=$RPM_BUILD_ROOT \ |
| --doc-dir=$RPM_BUILD_ROOT%{doc_hadoop} \ |
| --example-dir=$RPM_BUILD_ROOT%{doc_hadoop}/examples \ |
| --native-build-string=%{hadoop_arch} \ |
| --installed-lib-dir=%{lib_hadoop} \ |
| --man-dir=$RPM_BUILD_ROOT%{man_hadoop} \ |
| |
| %__mv -f $RPM_BUILD_ROOT/usr/share/doc/libhdfs-devel $RPM_BUILD_ROOT/%{_docdir}/libhdfs-%{hadoop_version} |
| |
| # Init.d scripts |
| %__install -d -m 0755 $RPM_BUILD_ROOT/%{initd_dir}/ |
| |
| |
| %if %{?suse_version:1}0 |
| orig_init_file=$RPM_SOURCE_DIR/hadoop-init.tmpl.suse |
| %else |
| orig_init_file=$RPM_SOURCE_DIR/hadoop-init.tmpl |
| %endif |
| |
| # Generate the init.d scripts |
| for service in %{hadoop_services} |
| do |
| init_file=$RPM_BUILD_ROOT/%{initd_dir}/%{name}-${service} |
| %__cp $orig_init_file $init_file |
| %__sed -i -e 's|@HADOOP_COMMON_ROOT@|%{lib_hadoop}|' $init_file |
| %__sed -i -e "s|@HADOOP_DAEMON@|${service}|" $init_file |
| %__sed -i -e 's|@HADOOP_CONF_DIR@|%{config_hadoop}|' $init_file |
| |
| |
| case "$service" in |
| hadoop_services|namenode|secondarynamenode|datanode) |
| %__sed -i -e 's|@HADOOP_DAEMON_USER@|hdfs|' $init_file |
| ;; |
| jobtracker|tasktracker) |
| %__sed -i -e 's|@HADOOP_DAEMON_USER@|mapred|' $init_file |
| ;; |
| esac |
| |
| chmod 755 $init_file |
| done |
| %__install -d -m 0755 $RPM_BUILD_ROOT/etc/default |
| %__cp $RPM_SOURCE_DIR/hadoop.default $RPM_BUILD_ROOT/etc/default/hadoop |
| %__cp $RPM_SOURCE_DIR/hadoop-fuse.default $RPM_BUILD_ROOT/etc/default/hadoop-fuse |
| |
| %__install -d -m 0755 $RPM_BUILD_ROOT/etc/security/limits.d |
| %__install -m 0644 %{SOURCE9} $RPM_BUILD_ROOT/etc/security/limits.d/hdfs.conf |
| %__install -m 0644 %{SOURCE10} $RPM_BUILD_ROOT/etc/security/limits.d/mapred.conf |
| |
| # /var/lib/hadoop/cache |
| %__install -d -m 1777 $RPM_BUILD_ROOT/var/lib/%{name}/cache |
| # /var/log/hadoop |
| %__install -d -m 0755 $RPM_BUILD_ROOT/var/log |
| %__install -d -m 0775 $RPM_BUILD_ROOT/var/run/%{name} |
| %__install -d -m 0775 $RPM_BUILD_ROOT/%{log_hadoop} |
| |
| |
| %pre |
| getent group hadoop >/dev/null || groupadd -r hadoop |
| getent group hdfs >/dev/null || groupadd -r hdfs |
| getent group mapred >/dev/null || groupadd -r mapred |
| |
| getent passwd mapred >/dev/null || /usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r -g mapred -G hadoop --home %{lib_hadoop} mapred |
| |
| # Create an hdfs user if one does not already exist. |
| getent passwd hdfs >/dev/null || /usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r -g hdfs -G hadoop --home %{lib_hadoop} hdfs |
| |
| |
| %post |
| %{alternatives_cmd} --install %{config_hadoop} %{name}-conf %{etc_hadoop}/conf.empty 10 |
| %{alternatives_cmd} --install %{bin_hadoop}/%{hadoop_name} %{hadoop_name}-default %{bin_hadoop}/%{name} 20 \ |
| --slave %{log_hadoop_dirname}/%{hadoop_name} %{hadoop_name}-log %{log_hadoop} \ |
| --slave %{lib_hadoop_dirname}/%{hadoop_name} %{hadoop_name}-lib %{lib_hadoop} \ |
| --slave /etc/%{hadoop_name} %{hadoop_name}-etc %{etc_hadoop} \ |
| --slave %{man_hadoop}/man1/%{hadoop_name}.1.*z %{hadoop_name}-man %{man_hadoop}/man1/%{name}.1.*z |
| |
| |
| %preun |
| if [ "$1" = 0 ]; then |
| # Stop any services that might be running |
| for service in %{hadoop_services} |
| do |
| service hadoop-$service stop 1>/dev/null 2>/dev/null || : |
| done |
| %{alternatives_cmd} --remove %{name}-conf %{etc_hadoop}/conf.empty || : |
| %{alternatives_cmd} --remove %{hadoop_name}-default %{bin_hadoop}/%{name} || : |
| fi |
| |
| %files |
| %defattr(-,root,root) |
| %config(noreplace) %{etc_hadoop}/conf.empty |
| %config(noreplace) /etc/default/hadoop |
| %config(noreplace) /etc/security/limits.d/hdfs.conf |
| %config(noreplace) /etc/security/limits.d/mapred.conf |
| %{lib_hadoop} |
| %{bin_hadoop}/%{name} |
| %{man_hadoop}/man1/hadoop.1.*z |
| %attr(0775,root,hadoop) /var/run/%{name} |
| %attr(0775,root,hadoop) %{log_hadoop} |
| |
| %exclude %{lib_hadoop}/lib/native |
| %exclude %{lib_hadoop}/sbin/%{hadoop_arch} |
| %exclude %{lib_hadoop}/bin/fuse_dfs |
| # FIXME: The following is a workaround for BIGTOP-139 |
| %exclude %{lib_hadoop}/bin/task-controller |
| %exclude %{lib_hadoop}/libexec/jsvc* |
| |
| %files doc |
| %defattr(-,root,root) |
| %doc %{doc_hadoop} |
| |
| %files source |
| %defattr(-,root,root) |
| %{src_hadoop} |
| |
| |
| |
| # Service file management RPMs |
| %define service_macro() \ |
| %files %1 \ |
| %defattr(-,root,root) \ |
| %{initd_dir}/%{name}-%1 \ |
| %post %1 \ |
| chkconfig --add %{name}-%1 \ |
| \ |
| %preun %1 \ |
| if [ $1 = 0 ]; then \ |
| service %{name}-%1 stop > /dev/null 2>&1 \ |
| chkconfig --del %{name}-%1 \ |
| fi \ |
| %postun %1 \ |
| if [ $1 -ge 1 ]; then \ |
| service %{name}-%1 condrestart >/dev/null 2>&1 \ |
| fi |
| %service_macro namenode |
| %service_macro secondarynamenode |
| %service_macro datanode |
| %service_macro jobtracker |
| %service_macro tasktracker |
| |
| # Pseudo-distributed Hadoop installation |
| %post conf-pseudo |
| %{alternatives_cmd} --install %{config_hadoop} %{name}-conf %{etc_hadoop}/conf.pseudo 30 |
| |
| |
| %files conf-pseudo |
| %defattr(-,root,root) |
| %config(noreplace) %attr(755,root,root) %{etc_hadoop}/conf.pseudo |
| %dir %attr(0755,root,hadoop) /var/lib/%{name} |
| %dir %attr(1777,root,hadoop) /var/lib/%{name}/cache |
| |
| %preun conf-pseudo |
| if [ "$1" = 0 ]; then |
| %{alternatives_cmd} --remove %{name}-conf %{etc_hadoop}/conf.pseudo |
| rm -f %{etc_hadoop}/conf |
| fi |
| |
| %files native |
| %defattr(-,root,root) |
| %{lib_hadoop}/lib/native |
| |
| %files fuse |
| %defattr(-,root,root) |
| %config(noreplace) /etc/default/hadoop-fuse |
| %attr(0755,root,root) %{lib_hadoop}/bin/fuse_dfs |
| %attr(0755,root,root) %{lib_hadoop}/bin/fuse_dfs_wrapper.sh |
| %attr(0755,root,root) %{bin_hadoop}/hadoop-fuse-dfs |
| %attr(0644,root,root) %{man_hadoop}/man1/hadoop-fuse-dfs.1.* |
| %config(noreplace) /etc/default/hadoop-fuse |
| |
| %files pipes |
| %defattr(-,root,root) |
| %{_libdir}/libhadooppipes* |
| %{_libdir}/libhadooputil* |
| %{_includedir}/hadoop/* |
| |
| %files libhdfs |
| %defattr(-,root,root) |
| %{_libdir}/libhdfs* |
| %{_includedir}/hdfs.h |
| # -devel should be its own package |
| %doc %{_docdir}/libhdfs-%{hadoop_version} |
| |
| %files sbin |
| %defattr(-,root,root) |
| %dir %{lib_hadoop}/sbin |
| %dir %{lib_hadoop}/sbin/%{hadoop_arch} |
| %attr(4750,root,mapred) %{lib_hadoop}/sbin/%{hadoop_arch}/task-controller |
| %attr(0755,root,root) %{lib_hadoop}/sbin/%{hadoop_arch}/jsvc |
| |
| # FIXME: The following is a workaround for BIGTOP-139 |
| %attr(4750,root,mapred) %{lib_hadoop}/bin/task-controller |
| %attr(0755,root,root) %{lib_hadoop}/libexec/jsvc* |