blob: a79c826ba1107b8a7668d311a5608c3bc81f70b0 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<packages>
<bigtop-utils>
<metadata>
<summary>Collection of useful tools for Bigtop</summary>
<description>This includes a collection of useful tools and files for Bigtop</description>
<url>http://bigtop.apache.org/</url>
</metadata>
</bigtop-utils>
<bigtop-jsvc>
<metadata>
<summary>Application to launch java daemon</summary>
<description>jsvc executes classfile that implements a Daemon interface.</description>
<url>http://commons.apache.org/daemon/</url>
</metadata>
</bigtop-jsvc>
<bigtop-groovy>
<metadata>
<summary>An agile and dynamic language for the Java Virtual Machine</summary>
<description>Groovy provides a JVM based runtime environment for function programming and scripting.</description>
<url>http://groovy.codehaus.org/</url>
</metadata>
<deps>
<bigtop-utils/>
</deps>
</bigtop-groovy>
<bigtop-tomcat>
<metadata>
<summary>Apache Tomcat</summary>
<description>Apache Tomcat is an open source software implementation of the
Java Servlet and JavaServer Pages technologies.</description>
<url>http://tomcat.apache.org/</url>
</metadata>
</bigtop-tomcat>
<spark-core>
<metadata>
<summary>Lightning-Fast Cluster Computing</summary>
<description>Spark is a MapReduce-like cluster computing framework designed to support
low-latency iterative jobs and interactive use from an interpreter. It is
written in Scala, a high-level language for the JVM, and exposes a clean
language-integrated syntax that makes it easy to write parallel jobs.
Spark runs on top of the Apache Mesos cluster manager.</description>
<url>http://spark.apache.org/</url>
</metadata>
<deps>
<bigtop-utils/>
</deps>
<alternatives>
<spark-conf>
<status>auto</status>
<link>/etc/spark/conf</link>
<value>/etc/spark/conf.dist</value>
<alt>/etc/spark/conf.dist</alt>
</spark-conf>
</alternatives>
</spark-core>
<spark-master>
<metadata>
<summary>Server for Spark master</summary>
<description>Server for Spark master</description>
<url>http://spark.apache.org/</url>
</metadata>
<deps>
<spark>/self</spark>
</deps>
</spark-master>
<spark-worker>
<metadata>
<summary>Server for Spark worker</summary>
<description>Server for Spark worker</description>
<url>http://spark.apache.org/</url>
</metadata>
<deps>
<spark>/self</spark>
</deps>
</spark-worker>
<spark-python>
<metadata>
<summary>Python client for Spark</summary>
<description>Includes PySpark, an interactive Python shell for Spark, and related libraries</description>
<url>http://spark.apache.org/</url>
</metadata>
<deps>
<spark>/self</spark>
</deps>
</spark-python>
<solr>
<metadata>
<summary>Apache Solr is the popular, blazing fast open source enterprise search platform</summary>
<description>Solr is the popular, blazing fast open source enterprise search platform from
the Apache Lucene project. Its major features include powerful full-text
search, hit highlighting, faceted search, dynamic clustering, database
integration, rich document (e.g., Word, PDF) handling, and geospatial search.
Solr is highly scalable, providing distributed search and index replication,
and it powers the search and navigation features of many of the world's
largest internet sites.
Solr is written in Java and runs as a standalone full-text search server within
a servlet container such as Tomcat. Solr uses the Lucene Java search library at
its core for full-text indexing and search, and has REST-like HTTP/XML and JSON
APIs that make it easy to use from virtually any programming language. Solr's
powerful external configuration allows it to be tailored to almost any type of
application without Java coding, and it has an extensive plugin architecture
when more advanced customization is required.</description>
<url>http://lucene.apache.org/solr</url>
</metadata>
<deps>
<bigtop-tomcat/>
<bigtop-utils/>
</deps>
<alternatives>
</alternatives>
</solr>
<solr-doc>
<metadata>
<summary>Documentation for Apache Solr</summary>
<description>Documentation for Apache Solr</description>
<url>http://lucene.apache.org/solr</url>
</metadata>
<deps>
</deps>
<alternatives>
</alternatives>
</solr-doc>
<solr-server>
<metadata>
<summary>The Solr server</summary>
<description>This package starts the Solr server on startup</description>
<url>http://lucene.apache.org/solr</url>
</metadata>
<deps>
</deps>
<alternatives>
</alternatives>
</solr-server>
<sqoop>
<metadata>
<summary>Tool for easy imports and exports of data sets between databases and the Hadoop ecosystem</summary>
<description>Sqoop is a tool that provides the ability to import and export data sets between the Hadoop Distributed File System (HDFS) and relational databases.</description>
<url>http://sqoop.apache.org</url>
</metadata>
<deps>
<bigtop-tomcat/>
<bigtop-utils/>
<hadoop-client/>
<sqoop-client>/self</sqoop-client>
</deps>
<alternatives>
<sqoop-conf>
<status>auto</status>
<link>/etc/sqoop/conf</link>
<value>/etc/sqoop/conf.dist</value>
<alt>/etc/sqoop/conf.dist</alt>
</sqoop-conf>
</alternatives>
<groups>
<sqoop>
<user>sqoop</user>
</sqoop>
</groups>
</sqoop>
<sqoop-server>
<metadata>
<summary>Server for Sqoop.</summary>
<description>Centralized server for Sqoop.</description>
<url>http://sqoop.apache.org</url>
</metadata>
<deps>
<sqoop>/self</sqoop>
</deps>
</sqoop-server>
<sqoop-client>
<metadata>
<summary>Client for Sqoop.</summary>
<description>Lightweight client for Sqoop.</description>
<url>http://sqoop.apache.org</url>
</metadata>
</sqoop-client>
<oozie>
<metadata>
<summary>Oozie is a system that runs workflows of Hadoop jobs.</summary>
<description> Oozie is a system that runs workflows of Hadoop jobs.
Oozie workflows are actions arranged in a control dependency DAG (Direct
Acyclic Graph).
Oozie coordinator functionality allows to start workflows at regular
frequencies and when data becomes available in HDFS.
An Oozie workflow may contain the following types of actions nodes:
map-reduce, map-reduce streaming, map-reduce pipes, pig, file-system,
sub-workflows, java, hive, sqoop and ssh (deprecated).
Flow control operations within the workflow can be done using decision,
fork and join nodes. Cycles in workflows are not supported.
Actions and decisions can be parameterized with job properties, actions
output (i.e. Hadoop counters) and HDFS file information (file exists,
file size, etc). Formal parameters are expressed in the workflow definition
as ${VARIABLE NAME} variables.
A Workflow application is an HDFS directory that contains the workflow
definition (an XML file), all the necessary files to run all the actions:
JAR files for Map/Reduce jobs, shells for streaming Map/Reduce jobs, native
libraries, Pig scripts, and other resource files.
Running workflow jobs is done via command line tools, a WebServices API
or a Java API.
Monitoring the system and workflow jobs can be done via a web console, the
command line tools, the WebServices API and the Java API.
Oozie is a transactional system and it has built in automatic and manual
retry capabilities.
In case of workflow job failure, the workflow job can be rerun skipping
previously completed actions, the workflow application can be patched before
being rerun.</description>
<url>http://oozie.apache.org/</url>
</metadata>
<deps>
<oozie-client>/self</oozie-client>
</deps>
<groups>
<oozie>
<user>oozie</user>
</oozie>
</groups>
<alternatives>
<oozie-conf>
<status>auto</status>
<link>/etc/oozie/conf</link>
<value>/etc/oozie/conf.dist</value>
<alt>/etc/oozie/conf.dist</alt>
</oozie-conf>
</alternatives>
</oozie>
<oozie-client>
<metadata>
<summary>Client for Oozie Workflow Engine</summary>
<description>Oozie client is a command line client utility that allows remote
administration and monitoring of worflows. Using this client
utility you can submit worflows, start/suspend/resume/kill
workflows and find out their status at any instance. Apart from
such operations, you can also change the status of the entire
system, get vesion information. This client utility also allows
you to validate any worflows before they are deployed to the Oozie
server.</description>
<url>http://oozie.apache.org/</url>
</metadata>
<deps>
<hadoop/>
<bigtop-utils/>
</deps>
</oozie-client>
<zookeeper>
<metadata>
<summary>A high-performance coordination service for distributed applications.</summary>
<description>ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services. All of these kinds of services are used in some form or another by distributed applications. Each time they are implemented there is a lot of work that goes into fixing the bugs and race conditions that are inevitable. Because of the difficulty of implementing these kinds of services, applications initially usually skimp on them ,which make them brittle in the presence of change and difficult to manage. Even when done correctly, different implementations of these services lead to management complexity when the applications are deployed.
</description>
<url>http://zookeeper.apache.org/</url>
</metadata>
<deps>
<bigtop-utils/>
</deps>
<groups>
<zookeeper>
<user>zookeeper</user>
</zookeeper>
</groups>
<alternatives>
<zookeeper-conf>
<status>auto</status>
<link>/etc/zookeeper/conf</link>
<value>/etc/zookeeper/conf.dist</value>
<alt>/etc/zookeeper/conf.dist</alt>
</zookeeper-conf>
</alternatives>
</zookeeper>
<zookeeper-server>
<metadata>
<summary>The Hadoop Zookeeper server</summary>
<description>This package starts the zookeeper server on startup</description>
<url>http://zookeeper.apache.org/</url>
</metadata>
<deps>
<zookeeper>/self</zookeeper>
</deps>
</zookeeper-server>
<hive-hcatalog>
<metadata>
<summary>Apache Hcatalog is a data warehouse infrastructure built on top of Hadoop</summary>
<description>Apache HCatalog is a table and storage management service for data created using Apache Hadoop. This includes: * Providing a shared schema and data type mechanism. * Providing a table abstraction so that users need not be concerned with where or how their data is stored. * Providing interoperability across data processing tools such as Pig, Map Reduce, Streaming, and Hive.
</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hadoop/>
<bigtop-utils/>
<hive/>
</deps>
<alternatives>
<hive-hcatalog-conf>
<status>auto</status>
<value>/etc/hive-hcatalog/conf.dist</value>
<link>/etc/hive-hcatalog/conf</link>
<alt>/etc/hive-hcatalog/conf.dist</alt>
</hive-hcatalog-conf>
</alternatives>
</hive-hcatalog>
<hive-hcatalog-server>
<metadata>
<summary>Init scripts for HCatalog server</summary>
<description>Init scripts for HCatalog server</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hive-hcatalog>/self</hive-hcatalog>
</deps>
</hive-hcatalog-server>
<hive-jdbc>
<metadata>
<summary>Provides libraries necessary to connect to Apache Hive via JDBC</summary>
<description>This package provides libraries necessary to connect to Apache Hive via JDBC</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hadoop-client/>
</deps>
</hive-jdbc>
<hive>
<metadata>
<summary>Hive is a data warehouse infrastructure built on top of Hadoop</summary>
<description>Hive is a data warehouse infrastructure built on top of Hadoop that
provides tools to enable easy data summarization, adhoc querying and
analysis of large datasets data stored in Hadoop files. It provides a
mechanism to put structure on this data and it also provides a simple
query language called Hive QL which is based on SQL and which enables
users familiar with SQL to query this data. At the same time, this
language also allows traditional map/reduce programmers to be able to
plug in their custom mappers and reducers to do more sophisticated
analysis which may not be supported by the built-in capabilities of
the language.</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hadoop/>
<bigtop-utils/>
<hive-jdbc>/self</hive-jdbc>
</deps>
<alternatives>
<hive-conf>
<status>auto</status>
<value>/etc/hive/conf.dist</value>
<link>/etc/hive/conf</link>
<alt>/etc/hive/conf.dist</alt>
</hive-conf>
</alternatives>
</hive>
<hive-metastore>
<metadata>
<summary>Shared metadata repository for Hive.</summary>
<description>This optional package hosts a metadata server for Hive clients across a network to use.</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hive>/self</hive>
</deps>
<groups>
<hive>
<user>hive</user>
</hive>
</groups>
</hive-metastore>
<hive-server>
<metadata>
<summary>Provides a Hive Thrift service.</summary>
<description>This optional package hosts a Thrift server for Hive clients across a network to use.</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hive>/self</hive>
</deps>
<groups>
<hive>
<user>hive</user>
</hive>
</groups>
</hive-server>
<hive-hbase>
<metadata>
<summary>Provides integration between Apache HBase and Apache Hive</summary>
<description>This optional package provides integration between Apache HBase and Apache Hive</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hive>/self</hive>
<hbase/>
</deps>
</hive-hbase>
<hive-webhcat>
<metadata>
<summary>WebHcat provides a REST-like web API for HCatalog and related Hadoop components.</summary>
<description>WebHcat provides a REST-like web API for HCatalog and related Hadoop components.</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hive-hcatalog>/self</hive-hcatalog>
</deps>
<alternatives>
<hive-webhcat-conf>
<status>auto</status>
<value>/etc/hive-webhcat/conf.dist</value>
<link>/etc/hive-webhcat/conf</link>
<alt>/etc/hive-webhcat/conf.dist</alt>
</hive-webhcat-conf>
</alternatives>
</hive-webhcat>
<hive-webhcat-server>
<metadata>
<summary>Init scripts for WebHcat server</summary>
<description>Init scripts for WebHcat server.</description>
<url>http://hive.apache.org/</url>
</metadata>
<deps>
<hive-webhcat>/self</hive-webhcat>
</deps>
</hive-webhcat-server>
<ignite-hadoop>
<metadata>
<summary>Apache Ignite Hadoop accelerator. The system provides for in-memory caching of HDFS data and MR performance improvements</summary>
<description>Apache Ignite is an open-source, distributed, in-memory computation platform
* HDFS caching and MR performance booster
</description>
<url>https://ignite.incubator.apache.org/</url>
</metadata>
<deps>
<hadoop-hdfs/>
<hadoop-mapreduce/>
<bigtop-utils/>
</deps>
<alternatives>
<ignite-hadoop-conf>
<status>auto</status>
<link>/etc/ignite-hadoop/conf</link>
<value>/etc/ignite-hadoop/conf.dist</value>
<alt>/etc/ignite-hadoop/conf.dist</alt>
</ignite-hadoop-conf>
</alternatives>
</ignite-hadoop>
<ignite-hadoop-service>
<metadata>
<summary>Apache Ignite Hadoop Accelerator platform</summary>
<description>Ignite is an open-source, distributed, in-memory computation platform</description>
<url>https://ignite.incubator.apache.org/</url>
</metadata>
<deps>
<ignite-hadoop/>
</deps>
</ignite-hadoop-service>
<ignite-hadoop-doc>
<metadata>
<summary>Ignite Documentation</summary>
<description>Documentation for Apache Ignite platform</description>
<url>https://ignite.incubator.apache.org/</url>
</metadata>
</ignite-hadoop-doc>
<hbase>
<metadata>
<summary>HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware.</summary>
<description>HBase is an open-source, distributed, column-oriented store modeled after Google' Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, HBase provides Bigtable-like capabilities on top of Hadoop. HBase includes:
* Convenient base classes for backing Hadoop MapReduce jobs with HBase tables
* Query predicate push down via server side scan and get filters
* Optimizations for real time queries
* A high performance Thrift gateway
* A REST-ful Web service gateway that supports XML, Protobuf, and binary data encoding options
* Cascading source and sink modules
* Extensible jruby-based (JIRB) shell
* Support for exporting metrics via the Hadoop metrics subsystem to files or Ganglia; or via JMX</description>
<url>http://hbase.apache.org/</url>
</metadata>
<deps>
<zookeeper/>
<hadoop/>
<bigtop-utils/>
</deps>
<alternatives>
<hbase-conf>
<status>auto</status>
<value>/etc/hbase/conf.dist</value>
<link>/etc/hbase/conf</link>
<alt>/etc/hbase/conf.dist</alt>
</hbase-conf>
</alternatives>
<groups>
<hbase>
<user>hbase</user>
</hbase>
</groups>
</hbase>
<hbase-doc>
<metadata>
<summary>Hbase Documentation</summary>
<description>Documentation for Hbase</description>
<url>http://hbase.apache.org/</url>
</metadata>
</hbase-doc>
<hbase-master>
<metadata>
<summary>The Hadoop HBase master Server.</summary>
<description>HMaster is the "master server" for a HBase. There is only one HMaster for a single HBase deployment.</description>
<url>http://hbase.apache.org/</url>
</metadata>
<deps>
<hbase>/self</hbase>
</deps>
</hbase-master>
<hbase-regionserver>
<metadata>
<summary>The Hadoop HBase RegionServer server.</summary>
<description>HRegionServer makes a set of HRegions available to clients. It checks in with the HMaster. There are many HRegionServers in a single HBase deployment.</description>
<url>http://hbase.apache.org/</url>
</metadata>
<deps>
<hbase>/self</hbase>
</deps>
</hbase-regionserver>
<hbase-thrift>
<metadata>
<summary>The Hadoop HBase Thrift Interface</summary>
<description>ThriftServer - this class starts up a Thrift server which implements the Hbase API specified in the Hbase.thrift IDL file.
"Thrift is a software framework for scalable cross-language services development. It combines a powerful software stack with a code generation engine to build services that work efficiently and seamlessly between C++, Java, Python, PHP, and Ruby. Thrift was developed at Facebook, and we are now releasing it as open source." For additional information, see http://developers.facebook.com/thrift/. Facebook has announced their intent to migrate Thrift into Apache Incubator.</description>
<url>http://hbase.apache.org/</url>
</metadata>
<deps>
<hbase>/self</hbase>
</deps>
</hbase-thrift>
<hbase-rest>
<metadata>
<summary>The Apache HBase REST gateway</summary>
<description>The Apache HBase REST gateway</description>
<url>http://hbase.apache.org/</url>
</metadata>
<deps>
<hbase>/self</hbase>
</deps>
</hbase-rest>
<phoenix>
<metadata>
<summary>Phoenix is a SQL skin over HBase and client-embedded JDBC driver.</summary>
<description>Phoenix is a SQL skin over HBase, delivered as a client-embedded JDBC driver.
The Phoenix query engine transforms an SQL query into one or more HBase scans,
and orchestrates their execution to produce standard JDBC result sets. Direct
use of the HBase API, along with coprocessors and custom filters, results in
performance on the order of milliseconds for small queries, or seconds for
tens of millions of rows. Applications interact with Phoenix through a
standard JDBC interface; all the usual interfaces are supported.</description>
<url>http://phoenix.apache.org</url>
</metadata>
<deps>
<zookeeper/>
<hadoop/>
<hadoop-mapreduce/>
<hadoop-yarn/>
<hbase/>
</deps>
<alternatives>
<phoenix-conf>
<status>auto</status>
<value>/etc/phoenix/conf.dist</value>
<link>/etc/phoenix/conf</link>
<alt>/etc/phoenix/conf.dist</alt>
</phoenix-conf>
</alternatives>
</phoenix>
<hadoop>
<metadata>
<summary>Hadoop is a software platform for processing vast amounts of data</summary>
<description>Hadoop is a software platform that lets one easily write and
run applications that process vast amounts of data.
Here's what makes Hadoop especially useful:
* Scalable: Hadoop can reliably store and process petabytes.
* Economical: It distributes the data and processing across clusters
of commonly available computers. These clusters can number
into the thousands of nodes.
* Efficient: By distributing the data, Hadoop can process it in parallel
on the nodes where the data is located. This makes it
extremely rapid.
* Reliable: Hadoop automatically maintains multiple copies of data and
automatically redeploys computing tasks based on failures.
Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
MapReduce divides applications into many small blocks of work. HDFS creates
multiple replicas of data blocks for reliability, placing them on compute
nodes around the cluster. MapReduce can then process the data where it is
located.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<bigtop-utils/>
</deps>
<alternatives>
<hadoop-conf>
<status>auto</status>
<link>/etc/hadoop/conf</link>
<value>/etc/hadoop/conf.empty</value>
<alt>/etc/hadoop/conf.empty</alt>
</hadoop-conf>
</alternatives>
</hadoop>
<hadoop-hdfs>
<metadata>
<summary>The Hadoop Distributed File System</summary>
<description>Hadoop Distributed File System (HDFS) is the primary storage system used by
Hadoop applications. HDFS creates multiple replicas of data blocks and distributes
them on compute nodes throughout a cluster to enable reliable, extremely rapid
computations.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop>/self</hadoop>
<bigtop-utils/>
</deps>
<groups>
<hdfs>
<user>hdfs</user>
</hdfs>
</groups>
</hadoop-hdfs>
<hadoop-yarn>
<metadata>
<summary>The Hadoop NextGen MapReduce (YARN)</summary>
<description>YARN (Hadoop NextGen MapReduce) is a general purpose data-computation framework.
The fundamental idea of YARN is to split up the two major functionalities of the
JobTracker, resource management and job scheduling/monitoring, into separate daemons:
ResourceManager and NodeManager.
The ResourceManager is the ultimate authority that arbitrates resources among all
the applications in the system. The NodeManager is a per-node slave managing allocation
of computational resources on a single node. Both work in support of per-application
ApplicationMaster (AM).
An ApplicationMaster is, in effect, a framework specific library and is tasked with
negotiating resources from the ResourceManager and working with the NodeManager(s) to
execute and monitor the tasks.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop>/self</hadoop>
<bigtop-utils/>
</deps>
<groups>
<yarn>
<user>yarn</user>
</yarn>
</groups>
</hadoop-yarn>
<hadoop-mapreduce>
<metadata>
<summary>The Hadoop MapReduce (MRv2)</summary>
<description>Hadoop MapReduce is a programming model and software framework for writing applications
that rapidly process vast amounts of data in parallel on large clusters of compute nodes.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-yarn>/self</hadoop-yarn>
<bigtop-utils/>
</deps>
<groups>
<mapred>
<user>mapred</user>
</mapred>
</groups>
</hadoop-mapreduce>
<hadoop-httpfs>
<metadata>
<summary>HTTPFS for Hadoop</summary>
<description>The server providing HTTP REST API support for the complete FileSystem/FileContext
interface in HDFS.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-hdfs>/self</hadoop-hdfs>
<bigtop-utils/>
</deps>
<groups>
<httpfs>
<user>httpfs</user>
</httpfs>
</groups>
</hadoop-httpfs>
<hadoop-hdfs-namenode>
<metadata>
<summary>The Hadoop namenode manages the block locations of HDFS files</summary>
<description>The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
namenode, which manages the block locations of files on the filesystem.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-hdfs>/self</hadoop-hdfs>
</deps>
</hadoop-hdfs-namenode>
<hadoop-hdfs-zkfc>
<metadata>
<summary>Hadoop HDFS failover controller</summary>
<description>The Hadoop HDFS failover controller is a ZooKeeper client which also
monitors and manages the state of the NameNode. Each of the machines
which runs a NameNode also runs a ZKFC, and that ZKFC is responsible
for: Health monitoring, ZooKeeper session management, ZooKeeper-based
election.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-hdfs>/self</hadoop-hdfs>
</deps>
</hadoop-hdfs-zkfc>
<hadoop-hdfs-journalnode>
<metadata>
<summary>Hadoop HDFS JournalNode</summary>
<description>The HDFS JournalNode is responsible for persisting NameNode edit logs.
In a typical deployment the JournalNode daemon runs on at least three
separate machines in the cluster.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-hdfs>/self</hadoop-hdfs>
<hadoop>/self</hadoop>
</deps>
</hadoop-hdfs-journalnode>
<hadoop-hdfs-secondarynamenode>
<metadata>
<summary>Hadoop Secondary namenode</summary>
<description>The Secondary Name Node periodically compacts the Name Node EditLog
into a checkpoint. This compaction ensures that Name Node restarts
do not incur unnecessary downtime.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-hdfs>/self</hadoop-hdfs>
</deps>
</hadoop-hdfs-secondarynamenode>
<hadoop-hdfs-datanode>
<metadata>
<summary>Hadoop Data Node</summary>
<description>The Data Nodes in the Hadoop Cluster are responsible for serving up
blocks of data over the network to Hadoop Distributed Filesystem
(HDFS) clients.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-hdfs>/self</hadoop-hdfs>
</deps>
</hadoop-hdfs-datanode>
<hadoop-yarn-resourcemanager>
<metadata>
<summary>YARN Resource Manager</summary>
<description>The resource manager manages the global assignment of compute resources to applications</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-yarn>/self</hadoop-yarn>
</deps>
</hadoop-yarn-resourcemanager>
<hadoop-yarn-nodemanager>
<metadata>
<summary>YARN Node Manager</summary>
<description>The NodeManager is the per-machine framework agent who is responsible for
containers, monitoring their resource usage (cpu, memory, disk, network) and
reporting the same to the ResourceManager/Scheduler.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-yarn>/self</hadoop-yarn>
</deps>
</hadoop-yarn-nodemanager>
<hadoop-yarn-proxyserver>
<metadata>
<summary>YARN Web Proxy</summary>
<description>The web proxy server sits in front of the YARN application master web UI.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-yarn>/self</hadoop-yarn>
</deps>
</hadoop-yarn-proxyserver>
<hadoop-mapreduce-historyserver>
<metadata>
<summary>MapReduce History Server</summary>
<description>The History server keeps records of the different activities being performed on a Apache Hadoop cluster</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-mapreduce>/self</hadoop-mapreduce>
</deps>
</hadoop-mapreduce-historyserver>
<hadoop-conf-pseudo>
<metadata>
<summary>Pseudo-distributed Hadoop configuration</summary>
<description>Contains configuration files for a "pseudo-distributed" Hadoop deployment.
In this mode, each of the hadoop components runs as a separate Java process,
but all on the same machine.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop>/self</hadoop>
<hadoop-hdfs-namenode>/self</hadoop-hdfs-namenode>
<hadoop-hdfs-datanode>/self</hadoop-hdfs-datanode>
<hadoop-hdfs-secondarynamenode>/self</hadoop-hdfs-secondarynamenode>
<hadoop-yarn-resourcemanager>/self</hadoop-yarn-resourcemanager>
<hadoop-yarn-nodemanager>/self</hadoop-yarn-nodemanager>
<hadoop-mapreduce-historyserver>/self</hadoop-mapreduce-historyserver>
</deps>
</hadoop-conf-pseudo>
<hadoop-doc>
<metadata>
<summary>Hadoop Documentation</summary>
<description>Documentation for Hadoop</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
</hadoop-doc>
<hadoop-client>
<metadata>
<summary>Hadoop client side dependencies</summary>
<description>Installation of this package will provide you with all the dependencies for Hadoop clients.</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop>/self</hadoop>
<hadoop-hdfs>/self</hadoop-hdfs>
<hadoop-yarn>/self</hadoop-yarn>
<hadoop-mapreduce>/self</hadoop-mapreduce>
</deps>
</hadoop-client>
<hadoop-hdfs-fuse>
<metadata>
<summary>Mountable HDFS</summary>
<description>These projects (enumerated below) allow HDFS to be mounted (on most flavors of Unix) as a standard file system using</description>
<url>http://hadoop.apache.org/core/</url>
</metadata>
<deps>
<hadoop-client>/self</hadoop-client>
<hadoop>/self</hadoop>
<hadoop-libhdfs>/self</hadoop-libhdfs>
</deps>
</hadoop-hdfs-fuse>
<hue-common>
<metadata>
<summary>A browser-based desktop interface for Hadoop</summary>
<description>Hue is a browser-based desktop interface for interacting with Hadoop.
It supports a file browser, job tracker interface, cluster health monitor, and more.</description>
<url>http://github.com/cloudera/hue</url>
</metadata>
<deps>
<hue-server>/self</hue-server>
<hue-beeswax>/self</hue-beeswax>
</deps>
<alternatives>
<hue-conf>
<status>auto</status>
<link>/etc/hue/conf</link>
<value>/etc/hue/conf.empty</value>
<alt>/etc/hue/conf.empty</alt>
</hue-conf>
</alternatives>
</hue-common>
<hue-server>
<metadata>
<summary>Service Scripts for Hue</summary>
<description>This package provides the service scripts for Hue server.</description>
<url>http://github.com/cloudera/hue</url>
</metadata>
<deps>
<hue-common>/self</hue-common>
</deps>
</hue-server>
<hue-doc>
<metadata>
<summary>Documentation for Hue</summary>
<description>This package provides the installation manual, user guide, SDK documentation, and release notes.</description>
<url>http://github.com/cloudera/hue</url>
</metadata>
<deps>
</deps>
<alternatives>
</alternatives>
</hue-doc>
<hue-beeswax>
<metadata>
<summary>A UI for Hive on Hue</summary>
<description>Beeswax is a web interface for Hive.
It allows users to construct and run queries on Hive, manage tables,
and import and export data.</description>
<url>http://github.com/cloudera/hue</url>
</metadata>
<deps>
<hue-common>/self</hue-common>
<hive/>
<make/>
</deps>
</hue-beeswax>
<hue-pig>
<metadata>
<summary>A UI for Pig on Hue</summary>
<description>A web interface for Pig.
It allows users to construct and run Pig jobs.</description>
<url>http://github.com/cloudera/hue</url>
</metadata>
<deps>
<hue-common>/self</hue-common>
<make/>
<pig/>
</deps>
</hue-pig>
<hue>
<metadata>
<summary>The hue metapackage</summary>
<description>Hue is a browser-based desktop interface for interacting with Hadoop. It supports a file browser, job tracker interface, cluster health monitor, and more.</description>
<url>http://github.com/cloudera/hue</url>
</metadata>
<deps>
<hue-server>/self</hue-server>
<hue-beeswax>/self</hue-beeswax>
</deps>
</hue>
</packages>