bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml - bigtop - Git at Google

 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
    (the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 -->
 <packages>
   <bigtop-utils>
     <metadata>
       <summary>Collection of useful tools for Bigtop</summary>
       <description>This includes a collection of useful tools and files for Bigtop</description>
       <url>http://bigtop.apache.org/</url>
     </metadata>
   </bigtop-utils>
   <bigtop-jsvc>
     <metadata>
       <summary>Application to launch java daemon</summary>
       <description>jsvc executes classfile that implements a Daemon interface.</description>
       <url>http://commons.apache.org/daemon/</url>
     </metadata>
   </bigtop-jsvc>

   <bigtop-groovy>
     <metadata>
       <summary>An agile and dynamic language for the Java Virtual Machine</summary>
       <description>Groovy provides a JVM based runtime environment for function programming and scripting.</description>
       <url>http://groovy.codehaus.org/</url>
     </metadata>
     <deps>
       <bigtop-utils/>
     </deps>
   </bigtop-groovy>

   <bigtop-tomcat>
     <metadata>
       <summary>Apache Tomcat</summary>
       <description>Apache Tomcat is an open source software implementation of the
 Java Servlet and JavaServer Pages technologies.</description>
       <url>http://tomcat.apache.org/</url>
     </metadata>
   </bigtop-tomcat>
   <spark-core>
     <metadata>
       <summary>Lightning-Fast Cluster Computing</summary>
       <description>Spark is a MapReduce-like cluster computing framework designed to support
 low-latency iterative jobs and interactive use from an interpreter. It is
 written in Scala, a high-level language for the JVM, and exposes a clean
 language-integrated syntax that makes it easy to write parallel jobs.
 Spark runs on top of the Apache Mesos cluster manager.</description>
       <url>http://spark.apache.org/</url>
     </metadata>
     <deps>
       <bigtop-utils/>
     </deps>
     <alternatives>
       <spark-conf>
         <status>auto</status>
         <link>/etc/spark/conf</link>
         <value>/etc/spark/conf.dist</value>
         <alt>/etc/spark/conf.dist</alt>
       </spark-conf>
     </alternatives>
   </spark-core>
   <spark-master>
     <metadata>
       <summary>Server for Spark master</summary>
       <description>Server for Spark master</description>
       <url>http://spark.apache.org/</url>
     </metadata>
     <deps>
       <spark>/self</spark>
     </deps>
   </spark-master>
   <spark-worker>
     <metadata>
       <summary>Server for Spark worker</summary>
       <description>Server for Spark worker</description>
       <url>http://spark.apache.org/</url>
     </metadata>
     <deps>
       <spark>/self</spark>
     </deps>
   </spark-worker>
   <spark-python>
     <metadata>
       <summary>Python client for Spark</summary>
       <description>Includes PySpark, an interactive Python shell for Spark, and related libraries</description>
       <url>http://spark.apache.org/</url>
     </metadata>
     <deps>
       <spark>/self</spark>
     </deps>
   </spark-python>
   <flume>
     <metadata>
       <summary>Flume is a reliable, scalable, and manageable distributed log collection application for collecting data such as logs and delivering it to data stores such as Hadoop's HDFS.</summary>
       <description>Flume is a reliable, scalable, and manageable distributed data collection
  application for collecting data such as logs and delivering it to data stores
  such as Hadoop's HDFS.  It can efficiently collect, aggregate, and move large
  amounts of log data.  It has a simple, but flexible, architecture based on
  streaming data flows.  It is robust and fault tolerant with tunable reliability
  mechanisms and many failover and recovery mechanisms.  The system is centrally
  managed and allows for intelligent dynamic management.  It uses a simple
  extensible data model that allows for online analytic applications.</description>
       <url>http://incubator.apache.org/projects/flume.html</url>
     </metadata>
     <deps>
       <zookeeper/>
       <hadoop/>
       <bigtop-utils/>
     </deps>
     <groups>
       <flume>
         <user>flume</user>
       </flume>
     </groups>
     <alternatives>
       <flume-conf>
         <status>auto</status>
         <link>/etc/flume/conf</link>
         <value>/etc/flume/conf.empty</value>
         <alt>/etc/flume/conf.empty</alt>
       </flume-conf>
     </alternatives>
   </flume>
   <flume-agent>
     <metadata>
       <summary>The flume agent daemon is a core element of flume's data path and is responsible for generating, processing, and delivering data.</summary>
       <description>Flume is a reliable, scalable, and manageable distributed data collection application for collecting data such as logs and delivering it to data stores such as Hadoop's HDFS. It can efficiently collect, aggregate, and move large amounts of log data. It has a simple, but flexible, architecture based on streaming data flows. It is robust and fault tolerant with tunable reliability mechanisms and many failover and recovery mechanisms. The system is centrally managed and allows for intelligent dynamic management. It uses a simple extensible data model that allows for online analytic applications.</description>
       <url>http://incubator.apache.org/projects/flume.html</url>
     </metadata>
     <deps>
       <flume>/self</flume>
     </deps>
   </flume-agent>
   <solr>
     <metadata>
       <summary>Apache Solr is the popular, blazing fast open source enterprise search platform</summary>
       <description>Solr is the popular, blazing fast open source enterprise search platform from
 the Apache Lucene project. Its major features include powerful full-text
 search, hit highlighting, faceted search, dynamic clustering, database
 integration, rich document (e.g., Word, PDF) handling, and geospatial search.
 Solr is highly scalable, providing distributed search and index replication,
 and it powers the search and navigation features of many of the world's
 largest internet sites.

 Solr is written in Java and runs as a standalone full-text search server within
 a servlet container such as Tomcat. Solr uses the Lucene Java search library at
 its core for full-text indexing and search, and has REST-like HTTP/XML and JSON
 APIs that make it easy to use from virtually any programming language. Solr's
 powerful external configuration allows it to be tailored to almost any type of
 application without Java coding, and it has an extensive plugin architecture
 when more advanced customization is required.</description>
       <url>http://lucene.apache.org/solr</url>
     </metadata>
     <deps>
       <bigtop-tomcat/>
       <bigtop-utils/>
     </deps>
     <alternatives>
     </alternatives>
   </solr>
   <solr-doc>
     <metadata>
       <summary>Documentation for Apache Solr</summary>
       <description>Documentation for Apache Solr</description>
       <url>http://lucene.apache.org/solr</url>
     </metadata>
     <deps>
     </deps>
     <alternatives>
     </alternatives>
   </solr-doc>
   <solr-server>
     <metadata>
       <summary>The Solr server</summary>
       <description>This package starts the Solr server on startup</description>
       <url>http://lucene.apache.org/solr</url>
     </metadata>
     <deps>
     </deps>
     <alternatives>
     </alternatives>
   </solr-server>
   <sqoop>
     <metadata>
       <summary>Tool for easy imports and exports of data sets between databases and the Hadoop ecosystem</summary>
       <description>Sqoop is a tool that provides the ability to import and export data sets between the Hadoop Distributed File System (HDFS) and relational databases.</description>
       <url>http://sqoop.apache.org</url>
     </metadata>
     <deps>
       <bigtop-tomcat/>
       <bigtop-utils/>
       <hadoop-client/>
       <sqoop-client>/self</sqoop-client>
     </deps>
     <alternatives>
       <sqoop-conf>
         <status>auto</status>
         <link>/etc/sqoop/conf</link>
         <value>/etc/sqoop/conf.dist</value>
         <alt>/etc/sqoop/conf.dist</alt>
       </sqoop-conf>
     </alternatives>
     <groups>
       <sqoop>
         <user>sqoop</user>
       </sqoop>
     </groups>
   </sqoop>
   <sqoop-server>
     <metadata>
       <summary>Server for Sqoop.</summary>
       <description>Centralized server for Sqoop.</description>
       <url>http://sqoop.apache.org</url>
     </metadata>
     <deps>
       <sqoop>/self</sqoop>
     </deps>
   </sqoop-server>
   <sqoop-client>
     <metadata>
       <summary>Client for Sqoop.</summary>
       <description>Lightweight client for Sqoop.</description>
       <url>http://sqoop.apache.org</url>
     </metadata>
   </sqoop-client>
   <oozie>
     <metadata>
       <summary>Oozie is a system that runs workflows of Hadoop jobs.</summary>
       <description> Oozie is a system that runs workflows of Hadoop jobs.
  Oozie workflows are actions arranged in a control dependency DAG (Direct
  Acyclic Graph).

  Oozie coordinator functionality allows to start workflows at regular
  frequencies and when data becomes available in HDFS.

  An Oozie workflow may contain the following types of actions nodes:
  map-reduce, map-reduce streaming, map-reduce pipes, pig, file-system,
  sub-workflows, java, hive, sqoop and ssh (deprecated).

  Flow control operations within the workflow can be done using decision,
  fork and join nodes. Cycles in workflows are not supported.

  Actions and decisions can be parameterized with job properties, actions
  output (i.e. Hadoop counters) and HDFS  file information (file exists,
  file size, etc). Formal parameters are expressed in the workflow definition
  as ${VARIABLE NAME} variables.

  A Workflow application is an HDFS directory that contains the workflow
  definition (an XML file), all the necessary files to run all the actions:
  JAR files for Map/Reduce jobs, shells for streaming Map/Reduce jobs, native
  libraries, Pig scripts, and other resource files.

  Running workflow jobs is done via command line tools, a WebServices API
  or a Java API.

  Monitoring the system and workflow jobs can be done via a web console, the
  command line tools, the WebServices API and the Java API.

  Oozie is a transactional system and it has built in automatic and manual
  retry capabilities.

  In case of workflow job failure, the workflow job can be rerun skipping
  previously completed actions, the workflow application can be patched before
  being rerun.</description>
       <url>http://oozie.apache.org/</url>
     </metadata>
     <deps>
       <oozie-client>/self</oozie-client>
     </deps>
     <groups>
       <oozie>
         <user>oozie</user>
       </oozie>
     </groups>
     <alternatives>
       <oozie-conf>
         <status>auto</status>
         <link>/etc/oozie/conf</link>
         <value>/etc/oozie/conf.dist</value>
         <alt>/etc/oozie/conf.dist</alt>
       </oozie-conf>
     </alternatives>
   </oozie>
   <oozie-client>
     <metadata>
       <summary>Client for Oozie Workflow Engine</summary>
       <description>Oozie client is a command line client utility that allows remote
 administration and monitoring of worflows. Using this client
 utility you can submit worflows, start/suspend/resume/kill
 workflows and find out their status at any instance. Apart from
 such operations, you can also change the status of the entire
 system, get vesion information. This client utility also allows
 you to validate any worflows before they are deployed to the Oozie
 server.</description>
       <url>http://oozie.apache.org/</url>
     </metadata>
     <deps>
       <hadoop/>
       <bigtop-utils/>
     </deps>
   </oozie-client>
   <zookeeper>
     <metadata>
       <summary>A high-performance coordination service for distributed applications.</summary>
       <description>ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.  All of these kinds of services are used in some form or another by distributed applications. Each time they are implemented there is a lot of work that goes into fixing the bugs and race conditions that are inevitable. Because of the difficulty of implementing these kinds of services, applications initially usually skimp on them ,which make them brittle in the presence of change and difficult to manage. Even when done correctly, different implementations of these services lead to management complexity when the applications are deployed.
     </description>
       <url>http://zookeeper.apache.org/</url>
     </metadata>
     <deps>
       <bigtop-utils/>
     </deps>
     <groups>
       <zookeeper>
         <user>zookeeper</user>
       </zookeeper>
     </groups>
     <alternatives>
       <zookeeper-conf>
         <status>auto</status>
         <link>/etc/zookeeper/conf</link>
         <value>/etc/zookeeper/conf.dist</value>
         <alt>/etc/zookeeper/conf.dist</alt>
       </zookeeper-conf>
     </alternatives>
   </zookeeper>
   <zookeeper-server>
     <metadata>
       <summary>The Hadoop Zookeeper server</summary>
       <description>This package starts the zookeeper server on startup</description>
       <url>http://zookeeper.apache.org/</url>
     </metadata>
     <deps>
       <zookeeper>/self</zookeeper>
     </deps>
   </zookeeper-server>
   <hive-hcatalog>
     <metadata>
       <summary>Apache Hcatalog is a data warehouse infrastructure built on top of Hadoop</summary>
       <description>Apache HCatalog is a table and storage management service for data created using Apache Hadoop. This includes: * Providing a shared schema and data type mechanism. * Providing a table abstraction so that users need not be concerned with where or how their data is stored. * Providing interoperability across data processing tools such as Pig, Map Reduce, Streaming, and Hive.
       </description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hadoop/>
       <bigtop-utils/>
       <hive/>
     </deps>
     <alternatives>
       <hive-hcatalog-conf>
         <status>auto</status>
         <value>/etc/hive-hcatalog/conf.dist</value>
         <link>/etc/hive-hcatalog/conf</link>
         <alt>/etc/hive-hcatalog/conf.dist</alt>
       </hive-hcatalog-conf>
     </alternatives>
   </hive-hcatalog>
   <hive-hcatalog-server>
     <metadata>
       <summary>Init scripts for HCatalog server</summary>
       <description>Init scripts for HCatalog server</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hive-hcatalog>/self</hive-hcatalog>
     </deps>
   </hive-hcatalog-server>
   <hive-jdbc>
     <metadata>
       <summary>Provides libraries necessary to connect to Apache Hive via JDBC</summary>
       <description>This package provides libraries necessary to connect to Apache Hive via JDBC</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hadoop-client/>
     </deps>
   </hive-jdbc>
   <hive>
     <metadata>
       <summary>Hive is a data warehouse infrastructure built on top of Hadoop</summary>
       <description>Hive is a data warehouse infrastructure built on top of Hadoop that
  provides tools to enable easy data summarization, adhoc querying and
  analysis of large datasets data stored in Hadoop files. It provides a
  mechanism to put structure on this data and it also provides a simple
  query language called Hive QL which is based on SQL and which enables
  users familiar with SQL to query this data. At the same time, this
  language also allows traditional map/reduce programmers to be able to
  plug in their custom mappers and reducers to do more sophisticated
  analysis which may not be supported by the built-in capabilities of
  the language.</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hadoop/>
       <bigtop-utils/>
       <hive-jdbc>/self</hive-jdbc>
     </deps>
     <alternatives>
       <hive-conf>
         <status>auto</status>
         <value>/etc/hive/conf.dist</value>
         <link>/etc/hive/conf</link>
         <alt>/etc/hive/conf.dist</alt>
       </hive-conf>
     </alternatives>
   </hive>
   <hive-metastore>
     <metadata>
       <summary>Shared metadata repository for Hive.</summary>
       <description>This optional package hosts a metadata server for Hive clients across a network to use.</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hive>/self</hive>
     </deps>
     <groups>
       <hive>
         <user>hive</user>
       </hive>
     </groups>
   </hive-metastore>
   <hive-server>
     <metadata>
       <summary>Provides a Hive Thrift service.</summary>
       <description>This optional package hosts a Thrift server for Hive clients across a network to use.</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hive>/self</hive>
     </deps>
     <groups>
       <hive>
         <user>hive</user>
       </hive>
     </groups>
   </hive-server>
   <hive-hbase>
     <metadata>
       <summary>Provides integration between Apache HBase and Apache Hive</summary>
       <description>This optional package provides integration between Apache HBase and Apache Hive</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hive>/self</hive>
       <hbase/>
     </deps>
   </hive-hbase>
   <hive-webhcat>
     <metadata>
       <summary>WebHcat provides a REST-like web API for HCatalog and related Hadoop components.</summary>
       <description>WebHcat provides a REST-like web API for HCatalog and related Hadoop components.</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hive-hcatalog>/self</hive-hcatalog>
     </deps>
     <alternatives>
       <hive-webhcat-conf>
         <status>auto</status>
         <value>/etc/hive-webhcat/conf.dist</value>
         <link>/etc/hive-webhcat/conf</link>
         <alt>/etc/hive-webhcat/conf.dist</alt>
       </hive-webhcat-conf>
     </alternatives>
   </hive-webhcat>
   <hive-webhcat-server>
     <metadata>
       <summary>Init scripts for WebHcat server</summary>
       <description>Init scripts for WebHcat server.</description>
       <url>http://hive.apache.org/</url>
     </metadata>
     <deps>
       <hive-webhcat>/self</hive-webhcat>
     </deps>
   </hive-webhcat-server>
   <ignite-hadoop>
     <metadata>
       <summary>Apache Ignite Hadoop accelerator. The system provides for in-memory caching of HDFS data and MR performance improvements</summary>
       <description>Apache Ignite is an open-source, distributed, in-memory computation platform

         * HDFS caching and MR performance booster

       </description>
       <url>https://ignite.incubator.apache.org/</url>
     </metadata>
     <deps>
       <hadoop-hdfs/>
       <hadoop-mapreduce/>
       <bigtop-utils/>
     </deps>
     <alternatives>
       <ignite-hadoop-conf>
         <status>auto</status>
         <link>/etc/ignite-hadoop/conf</link>
         <value>/etc/ignite-hadoop/conf.dist</value>
         <alt>/etc/ignite-hadoop/conf.dist</alt>
       </ignite-hadoop-conf>
     </alternatives>
   </ignite-hadoop>
     <ignite-hadoop-service>
       <metadata>
         <summary>Apache Ignite Hadoop Accelerator platform</summary>
         <description>Ignite is an open-source, distributed, in-memory computation platform</description>
         <url>https://ignite.incubator.apache.org/</url>
       </metadata>
       <deps>
         <ignite-hadoop/>
       </deps>
     </ignite-hadoop-service>
     <ignite-hadoop-doc>
       <metadata>
         <summary>Ignite Documentation</summary>
         <description>Documentation for Apache Ignite platform</description>
         <url>https://ignite.incubator.apache.org/</url>
       </metadata>
     </ignite-hadoop-doc>
     <hbase>
     <metadata>
       <summary>HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware.</summary>
       <description>HBase is an open-source, distributed, column-oriented store modeled after Google' Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, HBase provides Bigtable-like capabilities on top of Hadoop. HBase includes:

     * Convenient base classes for backing Hadoop MapReduce jobs with HBase tables
     * Query predicate push down via server side scan and get filters
     * Optimizations for real time queries
     * A high performance Thrift gateway
     * A REST-ful Web service gateway that supports XML, Protobuf, and binary data encoding options
     * Cascading source and sink modules
     * Extensible jruby-based (JIRB) shell
     * Support for exporting metrics via the Hadoop metrics subsystem to files or Ganglia; or via JMX</description>
       <url>http://hbase.apache.org/</url>
     </metadata>
     <deps>
       <zookeeper/>
       <hadoop/>
       <bigtop-utils/>
     </deps>
     <alternatives>
       <hbase-conf>
         <status>auto</status>
         <value>/etc/hbase/conf.dist</value>
         <link>/etc/hbase/conf</link>
         <alt>/etc/hbase/conf.dist</alt>
       </hbase-conf>
     </alternatives>
     <groups>
       <hbase>
         <user>hbase</user>
       </hbase>
     </groups>
   </hbase>
   <hbase-doc>
     <metadata>
       <summary>Hbase Documentation</summary>
       <description>Documentation for Hbase</description>
       <url>http://hbase.apache.org/</url>
     </metadata>
   </hbase-doc>
   <hbase-master>
     <metadata>
       <summary>The Hadoop HBase master Server.</summary>
       <description>HMaster is the "master server" for a HBase. There is only one HMaster for a single HBase deployment.</description>
       <url>http://hbase.apache.org/</url>
     </metadata>
     <deps>
       <hbase>/self</hbase>
     </deps>
   </hbase-master>
   <hbase-regionserver>
     <metadata>
       <summary>The Hadoop HBase RegionServer server.</summary>
       <description>HRegionServer makes a set of HRegions available to clients. It checks in with the HMaster. There are many HRegionServers in a single HBase deployment.</description>
       <url>http://hbase.apache.org/</url>
     </metadata>
     <deps>
       <hbase>/self</hbase>
     </deps>
   </hbase-regionserver>
   <hbase-thrift>
     <metadata>
       <summary>The Hadoop HBase Thrift Interface</summary>
       <description>ThriftServer - this class starts up a Thrift server which implements the Hbase API specified in the Hbase.thrift IDL file.
 "Thrift is a software framework for scalable cross-language services development. It combines a powerful software stack with a code generation engine to build services that work efficiently and seamlessly between C++, Java, Python, PHP, and Ruby. Thrift was developed at Facebook, and we are now releasing it as open source." For additional information, see http://developers.facebook.com/thrift/. Facebook has announced their intent to migrate Thrift into Apache Incubator.</description>
       <url>http://hbase.apache.org/</url>
     </metadata>
     <deps>
       <hbase>/self</hbase>
     </deps>
   </hbase-thrift>
   <hbase-rest>
     <metadata>
       <summary>The Apache HBase REST gateway</summary>
       <description>The Apache HBase REST gateway</description>
       <url>http://hbase.apache.org/</url>
     </metadata>
     <deps>
       <hbase>/self</hbase>
     </deps>
   </hbase-rest>
   <phoenix>
     <metadata>
       <summary>Phoenix is a SQL skin over HBase and client-embedded JDBC driver.</summary>
       <description>Phoenix is a SQL skin over HBase, delivered as a client-embedded JDBC driver.
 The Phoenix query engine transforms an SQL query into one or more HBase scans,
 and orchestrates their execution to produce standard JDBC result sets. Direct
 use of the HBase API, along with coprocessors and custom filters, results in
 performance on the order of milliseconds for small queries, or seconds for
 tens of millions of rows. Applications interact with Phoenix through a
 standard JDBC interface; all the usual interfaces are supported.</description>
       <url>http://phoenix.apache.org</url>
     </metadata>
     <deps>
       <zookeeper/>
       <hadoop/>
       <hadoop-mapreduce/>
       <hadoop-yarn/>
       <hbase/>
     </deps>
     <alternatives>
       <phoenix-conf>
         <status>auto</status>
         <value>/etc/phoenix/conf.dist</value>
         <link>/etc/phoenix/conf</link>
         <alt>/etc/phoenix/conf.dist</alt>
       </phoenix-conf>
     </alternatives>
   </phoenix>
   <hadoop>
     <metadata>
       <summary>Hadoop is a software platform for processing vast amounts of data</summary>
       <description>Hadoop is a software platform that lets one easily write and
 run applications that process vast amounts of data.

 Here's what makes Hadoop especially useful:
 * Scalable: Hadoop can reliably store and process petabytes.
 * Economical: It distributes the data and processing across clusters
               of commonly available computers. These clusters can number
               into the thousands of nodes.
 * Efficient: By distributing the data, Hadoop can process it in parallel
              on the nodes where the data is located. This makes it
              extremely rapid.
 * Reliable: Hadoop automatically maintains multiple copies of data and
             automatically redeploys computing tasks based on failures.

 Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
 MapReduce divides applications into many small blocks of work. HDFS creates
 multiple replicas of data blocks for reliability, placing them on compute
 nodes around the cluster. MapReduce can then process the data where it is
 located.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <bigtop-utils/>
     </deps>
     <alternatives>
       <hadoop-conf>
         <status>auto</status>
         <link>/etc/hadoop/conf</link>
         <value>/etc/hadoop/conf.empty</value>
         <alt>/etc/hadoop/conf.empty</alt>
       </hadoop-conf>
     </alternatives>
   </hadoop>
   <hadoop-hdfs>
     <metadata>
       <summary>The Hadoop Distributed File System</summary>
       <description>Hadoop Distributed File System (HDFS) is the primary storage system used by
 Hadoop applications. HDFS creates multiple replicas of data blocks and distributes
 them on compute nodes throughout a cluster to enable reliable, extremely rapid
 computations.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop>/self</hadoop>
       <bigtop-utils/>
     </deps>
     <groups>
       <hdfs>
         <user>hdfs</user>
       </hdfs>
     </groups>
   </hadoop-hdfs>
   <hadoop-yarn>
     <metadata>
       <summary>The Hadoop NextGen MapReduce (YARN)</summary>
       <description>YARN (Hadoop NextGen MapReduce) is a general purpose data-computation framework.
 The fundamental idea of YARN is to split up the two major functionalities of the
 JobTracker, resource management and job scheduling/monitoring, into separate daemons:
 ResourceManager and NodeManager.

 The ResourceManager is the ultimate authority that arbitrates resources among all
 the applications in the system. The NodeManager is a per-node slave managing allocation
 of computational resources on a single node. Both work in support of per-application
 ApplicationMaster (AM).

 An ApplicationMaster is, in effect, a framework specific library and is tasked with
 negotiating resources from the ResourceManager and working with the NodeManager(s) to
 execute and monitor the tasks.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop>/self</hadoop>
       <bigtop-utils/>
     </deps>
     <groups>
       <yarn>
         <user>yarn</user>
       </yarn>
     </groups>
   </hadoop-yarn>
   <hadoop-mapreduce>
     <metadata>
       <summary>The Hadoop MapReduce (MRv2)</summary>
       <description>Hadoop MapReduce is a programming model and software framework for writing applications
 that rapidly process vast amounts of data in parallel on large clusters of compute nodes.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-yarn>/self</hadoop-yarn>
       <bigtop-utils/>
     </deps>
     <groups>
       <mapred>
         <user>mapred</user>
       </mapred>
     </groups>
   </hadoop-mapreduce>
   <hadoop-httpfs>
     <metadata>
       <summary>HTTPFS for Hadoop</summary>
       <description>The server providing HTTP REST API support for the complete FileSystem/FileContext
 interface in HDFS.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-hdfs>/self</hadoop-hdfs>
       <bigtop-utils/>
     </deps>
     <groups>
       <httpfs>
         <user>httpfs</user>
       </httpfs>
     </groups>
   </hadoop-httpfs>
   <hadoop-hdfs-namenode>
     <metadata>
       <summary>The Hadoop namenode manages the block locations of HDFS files</summary>
       <description>The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
 namenode, which manages the block locations of files on the filesystem.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-hdfs>/self</hadoop-hdfs>
     </deps>
   </hadoop-hdfs-namenode>
   <hadoop-hdfs-zkfc>
     <metadata>
       <summary>Hadoop HDFS failover controller</summary>
       <description>The Hadoop HDFS failover controller is a ZooKeeper client which also
 monitors and manages the state of the NameNode. Each of the machines
 which runs a NameNode also runs a ZKFC, and that ZKFC is responsible
 for: Health monitoring, ZooKeeper session management, ZooKeeper-based
 election.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-hdfs>/self</hadoop-hdfs>
     </deps>
   </hadoop-hdfs-zkfc>
   <hadoop-hdfs-journalnode>
     <metadata>
       <summary>Hadoop HDFS JournalNode</summary>
       <description>The HDFS JournalNode is responsible for persisting NameNode edit logs.
 In a typical deployment the JournalNode daemon runs on at least three
 separate machines in the cluster.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-hdfs>/self</hadoop-hdfs>
       <hadoop>/self</hadoop>
     </deps>
   </hadoop-hdfs-journalnode>
   <hadoop-hdfs-secondarynamenode>
     <metadata>
       <summary>Hadoop Secondary namenode</summary>
       <description>The Secondary Name Node periodically compacts the Name Node EditLog
 into a checkpoint.  This compaction ensures that Name Node restarts
 do not incur unnecessary downtime.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-hdfs>/self</hadoop-hdfs>
     </deps>
   </hadoop-hdfs-secondarynamenode>
   <hadoop-hdfs-datanode>
     <metadata>
       <summary>Hadoop Data Node</summary>
       <description>The Data Nodes in the Hadoop Cluster are responsible for serving up
 blocks of data over the network to Hadoop Distributed Filesystem
 (HDFS) clients.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-hdfs>/self</hadoop-hdfs>
     </deps>
   </hadoop-hdfs-datanode>
   <hadoop-yarn-resourcemanager>
     <metadata>
       <summary>YARN Resource Manager</summary>
       <description>The resource manager manages the global assignment of compute resources to applications</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-yarn>/self</hadoop-yarn>
     </deps>
   </hadoop-yarn-resourcemanager>
   <hadoop-yarn-nodemanager>
     <metadata>
       <summary>YARN Node Manager</summary>
       <description>The NodeManager is the per-machine framework agent who is responsible for
 containers, monitoring their resource usage (cpu, memory, disk, network) and
 reporting the same to the ResourceManager/Scheduler.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-yarn>/self</hadoop-yarn>
     </deps>
   </hadoop-yarn-nodemanager>
   <hadoop-yarn-proxyserver>
     <metadata>
       <summary>YARN Web Proxy</summary>
       <description>The web proxy server sits in front of the YARN application master web UI.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-yarn>/self</hadoop-yarn>
     </deps>
   </hadoop-yarn-proxyserver>
   <hadoop-mapreduce-historyserver>
     <metadata>
       <summary>MapReduce History Server</summary>
       <description>The History server keeps records of the different activities being performed on a Apache Hadoop cluster</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-mapreduce>/self</hadoop-mapreduce>
     </deps>
   </hadoop-mapreduce-historyserver>
   <hadoop-conf-pseudo>
     <metadata>
       <summary>Pseudo-distributed Hadoop configuration</summary>
       <description>Contains configuration files for a "pseudo-distributed" Hadoop deployment.
 In this mode, each of the hadoop components runs as a separate Java process,
 but all on the same machine.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop>/self</hadoop>
       <hadoop-hdfs-namenode>/self</hadoop-hdfs-namenode>
       <hadoop-hdfs-datanode>/self</hadoop-hdfs-datanode>
       <hadoop-hdfs-secondarynamenode>/self</hadoop-hdfs-secondarynamenode>
       <hadoop-yarn-resourcemanager>/self</hadoop-yarn-resourcemanager>
       <hadoop-yarn-nodemanager>/self</hadoop-yarn-nodemanager>
       <hadoop-mapreduce-historyserver>/self</hadoop-mapreduce-historyserver>
     </deps>
   </hadoop-conf-pseudo>
   <hadoop-doc>
     <metadata>
       <summary>Hadoop Documentation</summary>
       <description>Documentation for Hadoop</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
   </hadoop-doc>
   <hadoop-client>
     <metadata>
       <summary>Hadoop client side dependencies</summary>
       <description>Installation of this package will provide you with all the dependencies for Hadoop clients.</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop>/self</hadoop>
       <hadoop-hdfs>/self</hadoop-hdfs>
       <hadoop-yarn>/self</hadoop-yarn>
       <hadoop-mapreduce>/self</hadoop-mapreduce>
     </deps>
   </hadoop-client>
   <hadoop-hdfs-fuse>
     <metadata>
       <summary>Mountable HDFS</summary>
       <description>These projects (enumerated below) allow HDFS to be mounted (on most flavors of Unix) as a standard file system using</description>
       <url>http://hadoop.apache.org/core/</url>
     </metadata>
     <deps>
       <hadoop-client>/self</hadoop-client>
       <hadoop>/self</hadoop>
       <hadoop-libhdfs>/self</hadoop-libhdfs>
     </deps>
   </hadoop-hdfs-fuse>
   <hue-common>
     <metadata>
       <summary>A browser-based desktop interface for Hadoop</summary>
       <description>Hue is a browser-based desktop interface for interacting with Hadoop.
 It supports a file browser, job tracker interface, cluster health monitor, and more.</description>
       <url>http://github.com/cloudera/hue</url>
     </metadata>
     <deps>
       <hue-server>/self</hue-server>
       <hue-beeswax>/self</hue-beeswax>
     </deps>
     <alternatives>
       <hue-conf>
         <status>auto</status>
         <link>/etc/hue/conf</link>
         <value>/etc/hue/conf.empty</value>
         <alt>/etc/hue/conf.empty</alt>
       </hue-conf>
     </alternatives>
   </hue-common>
   <hue-server>
     <metadata>
       <summary>Service Scripts for Hue</summary>
       <description>This package provides the service scripts for Hue server.</description>
       <url>http://github.com/cloudera/hue</url>
     </metadata>
     <deps>
       <hue-common>/self</hue-common>
     </deps>
   </hue-server>
   <hue-doc>
     <metadata>
       <summary>Documentation for Hue</summary>
       <description>This package provides the installation manual, user guide, SDK documentation, and release notes.</description>
       <url>http://github.com/cloudera/hue</url>
     </metadata>
     <deps>
     </deps>
     <alternatives>
     </alternatives>
   </hue-doc>
   <hue-beeswax>
     <metadata>
       <summary>A UI for Hive on Hue</summary>
       <description>Beeswax is a web interface for Hive.

 It allows users to construct and run queries on Hive, manage tables,
 and import and export data.</description>
       <url>http://github.com/cloudera/hue</url>
     </metadata>
     <deps>
       <hue-common>/self</hue-common>
       <hive/>
       <make/>
     </deps>
   </hue-beeswax>
   <hue-pig>
     <metadata>
       <summary>A UI for Pig on Hue</summary>
       <description>A web interface for Pig.

 It allows users to construct and run Pig jobs.</description>
       <url>http://github.com/cloudera/hue</url>
     </metadata>
     <deps>
       <hue-common>/self</hue-common>
       <make/>
       <pig/>
     </deps>
   </hue-pig>
   <hue>
     <metadata>
       <summary>The hue metapackage</summary>
       <description>Hue is a browser-based desktop interface for interacting with Hadoop. It supports a file browser, job tracker interface, cluster health monitor, and more.</description>
       <url>http://github.com/cloudera/hue</url>
     </metadata>
     <deps>
       <hue-server>/self</hue-server>
       <hue-beeswax>/self</hue-beeswax>
     </deps>
   </hue>
 </packages>