| <?xml version="1.0" encoding="UTF-8" standalone="no"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <packages> |
| <bigtop-utils> |
| <metadata> |
| <summary>Collection of useful tools for Bigtop</summary> |
| <description>This includes a collection of useful tools and files for Bigtop</description> |
| <url>http://bigtop.apache.org/</url> |
| </metadata> |
| </bigtop-utils> |
| <bigtop-jsvc> |
| <metadata> |
| <summary>Application to launch java daemon</summary> |
| <description>jsvc executes classfile that implements a Daemon interface.</description> |
| <url>http://commons.apache.org/daemon/</url> |
| </metadata> |
| </bigtop-jsvc> |
| |
| <bigtop-groovy> |
| <metadata> |
| <summary>An agile and dynamic language for the Java Virtual Machine</summary> |
| <description>Groovy provides a JVM based runtime environment for function programming and scripting.</description> |
| <url>http://groovy.codehaus.org/</url> |
| </metadata> |
| <deps> |
| <bigtop-utils/> |
| </deps> |
| </bigtop-groovy> |
| |
| <bigtop-tomcat> |
| <metadata> |
| <summary>Apache Tomcat</summary> |
| <description>Apache Tomcat is an open source software implementation of the |
| Java Servlet and JavaServer Pages technologies.</description> |
| <url>http://tomcat.apache.org/</url> |
| </metadata> |
| </bigtop-tomcat> |
| <spark-core> |
| <metadata> |
| <summary>Lightning-Fast Cluster Computing</summary> |
| <description>Spark is a MapReduce-like cluster computing framework designed to support |
| low-latency iterative jobs and interactive use from an interpreter. It is |
| written in Scala, a high-level language for the JVM, and exposes a clean |
| language-integrated syntax that makes it easy to write parallel jobs. |
| Spark runs on top of the Apache Mesos cluster manager.</description> |
| <url>http://spark.apache.org/</url> |
| </metadata> |
| <deps> |
| <bigtop-utils/> |
| </deps> |
| <alternatives> |
| <spark-conf> |
| <status>auto</status> |
| <link>/etc/spark/conf</link> |
| <value>/etc/spark/conf.dist</value> |
| <alt>/etc/spark/conf.dist</alt> |
| </spark-conf> |
| </alternatives> |
| </spark-core> |
| <spark-master> |
| <metadata> |
| <summary>Server for Spark master</summary> |
| <description>Server for Spark master</description> |
| <url>http://spark.apache.org/</url> |
| </metadata> |
| <deps> |
| <spark>/self</spark> |
| </deps> |
| </spark-master> |
| <spark-worker> |
| <metadata> |
| <summary>Server for Spark worker</summary> |
| <description>Server for Spark worker</description> |
| <url>http://spark.apache.org/</url> |
| </metadata> |
| <deps> |
| <spark>/self</spark> |
| </deps> |
| </spark-worker> |
| <spark-python> |
| <metadata> |
| <summary>Python client for Spark</summary> |
| <description>Includes PySpark, an interactive Python shell for Spark, and related libraries</description> |
| <url>http://spark.apache.org/</url> |
| </metadata> |
| <deps> |
| <spark>/self</spark> |
| </deps> |
| </spark-python> |
| <flume> |
| <metadata> |
| <summary>Flume is a reliable, scalable, and manageable distributed log collection application for collecting data such as logs and delivering it to data stores such as Hadoop's HDFS.</summary> |
| <description>Flume is a reliable, scalable, and manageable distributed data collection |
| application for collecting data such as logs and delivering it to data stores |
| such as Hadoop's HDFS. It can efficiently collect, aggregate, and move large |
| amounts of log data. It has a simple, but flexible, architecture based on |
| streaming data flows. It is robust and fault tolerant with tunable reliability |
| mechanisms and many failover and recovery mechanisms. The system is centrally |
| managed and allows for intelligent dynamic management. It uses a simple |
| extensible data model that allows for online analytic applications.</description> |
| <url>http://incubator.apache.org/projects/flume.html</url> |
| </metadata> |
| <deps> |
| <zookeeper/> |
| <hadoop/> |
| <bigtop-utils/> |
| </deps> |
| <groups> |
| <flume> |
| <user>flume</user> |
| </flume> |
| </groups> |
| <alternatives> |
| <flume-conf> |
| <status>auto</status> |
| <link>/etc/flume/conf</link> |
| <value>/etc/flume/conf.empty</value> |
| <alt>/etc/flume/conf.empty</alt> |
| </flume-conf> |
| </alternatives> |
| </flume> |
| <flume-agent> |
| <metadata> |
| <summary>The flume agent daemon is a core element of flume's data path and is responsible for generating, processing, and delivering data.</summary> |
| <description>Flume is a reliable, scalable, and manageable distributed data collection application for collecting data such as logs and delivering it to data stores such as Hadoop's HDFS. It can efficiently collect, aggregate, and move large amounts of log data. It has a simple, but flexible, architecture based on streaming data flows. It is robust and fault tolerant with tunable reliability mechanisms and many failover and recovery mechanisms. The system is centrally managed and allows for intelligent dynamic management. It uses a simple extensible data model that allows for online analytic applications.</description> |
| <url>http://incubator.apache.org/projects/flume.html</url> |
| </metadata> |
| <deps> |
| <flume>/self</flume> |
| </deps> |
| </flume-agent> |
| <solr> |
| <metadata> |
| <summary>Apache Solr is the popular, blazing fast open source enterprise search platform</summary> |
| <description>Solr is the popular, blazing fast open source enterprise search platform from |
| the Apache Lucene project. Its major features include powerful full-text |
| search, hit highlighting, faceted search, dynamic clustering, database |
| integration, rich document (e.g., Word, PDF) handling, and geospatial search. |
| Solr is highly scalable, providing distributed search and index replication, |
| and it powers the search and navigation features of many of the world's |
| largest internet sites. |
| |
| Solr is written in Java and runs as a standalone full-text search server within |
| a servlet container such as Tomcat. Solr uses the Lucene Java search library at |
| its core for full-text indexing and search, and has REST-like HTTP/XML and JSON |
| APIs that make it easy to use from virtually any programming language. Solr's |
| powerful external configuration allows it to be tailored to almost any type of |
| application without Java coding, and it has an extensive plugin architecture |
| when more advanced customization is required.</description> |
| <url>http://lucene.apache.org/solr</url> |
| </metadata> |
| <deps> |
| <bigtop-tomcat/> |
| <bigtop-utils/> |
| </deps> |
| <alternatives> |
| </alternatives> |
| </solr> |
| <solr-doc> |
| <metadata> |
| <summary>Documentation for Apache Solr</summary> |
| <description>Documentation for Apache Solr</description> |
| <url>http://lucene.apache.org/solr</url> |
| </metadata> |
| <deps> |
| </deps> |
| <alternatives> |
| </alternatives> |
| </solr-doc> |
| <solr-server> |
| <metadata> |
| <summary>The Solr server</summary> |
| <description>This package starts the Solr server on startup</description> |
| <url>http://lucene.apache.org/solr</url> |
| </metadata> |
| <deps> |
| </deps> |
| <alternatives> |
| </alternatives> |
| </solr-server> |
| <sqoop> |
| <metadata> |
| <summary>Tool for easy imports and exports of data sets between databases and the Hadoop ecosystem</summary> |
| <description>Sqoop is a tool that provides the ability to import and export data sets between the Hadoop Distributed File System (HDFS) and relational databases.</description> |
| <url>http://sqoop.apache.org</url> |
| </metadata> |
| <deps> |
| <bigtop-tomcat/> |
| <bigtop-utils/> |
| <hadoop-client/> |
| <sqoop-client>/self</sqoop-client> |
| </deps> |
| <alternatives> |
| <sqoop-conf> |
| <status>auto</status> |
| <link>/etc/sqoop/conf</link> |
| <value>/etc/sqoop/conf.dist</value> |
| <alt>/etc/sqoop/conf.dist</alt> |
| </sqoop-conf> |
| </alternatives> |
| <groups> |
| <sqoop> |
| <user>sqoop</user> |
| </sqoop> |
| </groups> |
| </sqoop> |
| <sqoop-server> |
| <metadata> |
| <summary>Server for Sqoop.</summary> |
| <description>Centralized server for Sqoop.</description> |
| <url>http://sqoop.apache.org</url> |
| </metadata> |
| <deps> |
| <sqoop>/self</sqoop> |
| </deps> |
| </sqoop-server> |
| <sqoop-client> |
| <metadata> |
| <summary>Client for Sqoop.</summary> |
| <description>Lightweight client for Sqoop.</description> |
| <url>http://sqoop.apache.org</url> |
| </metadata> |
| </sqoop-client> |
| <oozie> |
| <metadata> |
| <summary>Oozie is a system that runs workflows of Hadoop jobs.</summary> |
| <description> Oozie is a system that runs workflows of Hadoop jobs. |
| Oozie workflows are actions arranged in a control dependency DAG (Direct |
| Acyclic Graph). |
| |
| Oozie coordinator functionality allows to start workflows at regular |
| frequencies and when data becomes available in HDFS. |
| |
| An Oozie workflow may contain the following types of actions nodes: |
| map-reduce, map-reduce streaming, map-reduce pipes, pig, file-system, |
| sub-workflows, java, hive, sqoop and ssh (deprecated). |
| |
| Flow control operations within the workflow can be done using decision, |
| fork and join nodes. Cycles in workflows are not supported. |
| |
| Actions and decisions can be parameterized with job properties, actions |
| output (i.e. Hadoop counters) and HDFS file information (file exists, |
| file size, etc). Formal parameters are expressed in the workflow definition |
| as ${VARIABLE NAME} variables. |
| |
| A Workflow application is an HDFS directory that contains the workflow |
| definition (an XML file), all the necessary files to run all the actions: |
| JAR files for Map/Reduce jobs, shells for streaming Map/Reduce jobs, native |
| libraries, Pig scripts, and other resource files. |
| |
| Running workflow jobs is done via command line tools, a WebServices API |
| or a Java API. |
| |
| Monitoring the system and workflow jobs can be done via a web console, the |
| command line tools, the WebServices API and the Java API. |
| |
| Oozie is a transactional system and it has built in automatic and manual |
| retry capabilities. |
| |
| In case of workflow job failure, the workflow job can be rerun skipping |
| previously completed actions, the workflow application can be patched before |
| being rerun.</description> |
| <url>http://oozie.apache.org/</url> |
| </metadata> |
| <deps> |
| <oozie-client>/self</oozie-client> |
| </deps> |
| <groups> |
| <oozie> |
| <user>oozie</user> |
| </oozie> |
| </groups> |
| <alternatives> |
| <oozie-conf> |
| <status>auto</status> |
| <link>/etc/oozie/conf</link> |
| <value>/etc/oozie/conf.dist</value> |
| <alt>/etc/oozie/conf.dist</alt> |
| </oozie-conf> |
| </alternatives> |
| </oozie> |
| <oozie-client> |
| <metadata> |
| <summary>Client for Oozie Workflow Engine</summary> |
| <description>Oozie client is a command line client utility that allows remote |
| administration and monitoring of worflows. Using this client |
| utility you can submit worflows, start/suspend/resume/kill |
| workflows and find out their status at any instance. Apart from |
| such operations, you can also change the status of the entire |
| system, get vesion information. This client utility also allows |
| you to validate any worflows before they are deployed to the Oozie |
| server.</description> |
| <url>http://oozie.apache.org/</url> |
| </metadata> |
| <deps> |
| <hadoop/> |
| <bigtop-utils/> |
| </deps> |
| </oozie-client> |
| <zookeeper> |
| <metadata> |
| <summary>A high-performance coordination service for distributed applications.</summary> |
| <description>ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services. All of these kinds of services are used in some form or another by distributed applications. Each time they are implemented there is a lot of work that goes into fixing the bugs and race conditions that are inevitable. Because of the difficulty of implementing these kinds of services, applications initially usually skimp on them ,which make them brittle in the presence of change and difficult to manage. Even when done correctly, different implementations of these services lead to management complexity when the applications are deployed. |
| </description> |
| <url>http://zookeeper.apache.org/</url> |
| </metadata> |
| <deps> |
| <bigtop-utils/> |
| </deps> |
| <groups> |
| <zookeeper> |
| <user>zookeeper</user> |
| </zookeeper> |
| </groups> |
| <alternatives> |
| <zookeeper-conf> |
| <status>auto</status> |
| <link>/etc/zookeeper/conf</link> |
| <value>/etc/zookeeper/conf.dist</value> |
| <alt>/etc/zookeeper/conf.dist</alt> |
| </zookeeper-conf> |
| </alternatives> |
| </zookeeper> |
| <zookeeper-server> |
| <metadata> |
| <summary>The Hadoop Zookeeper server</summary> |
| <description>This package starts the zookeeper server on startup</description> |
| <url>http://zookeeper.apache.org/</url> |
| </metadata> |
| <deps> |
| <zookeeper>/self</zookeeper> |
| </deps> |
| </zookeeper-server> |
| <hive-hcatalog> |
| <metadata> |
| <summary>Apache Hcatalog is a data warehouse infrastructure built on top of Hadoop</summary> |
| <description>Apache HCatalog is a table and storage management service for data created using Apache Hadoop. This includes: * Providing a shared schema and data type mechanism. * Providing a table abstraction so that users need not be concerned with where or how their data is stored. * Providing interoperability across data processing tools such as Pig, Map Reduce, Streaming, and Hive. |
| </description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hadoop/> |
| <bigtop-utils/> |
| <hive/> |
| </deps> |
| <alternatives> |
| <hive-hcatalog-conf> |
| <status>auto</status> |
| <value>/etc/hive-hcatalog/conf.dist</value> |
| <link>/etc/hive-hcatalog/conf</link> |
| <alt>/etc/hive-hcatalog/conf.dist</alt> |
| </hive-hcatalog-conf> |
| </alternatives> |
| </hive-hcatalog> |
| <hive-hcatalog-server> |
| <metadata> |
| <summary>Init scripts for HCatalog server</summary> |
| <description>Init scripts for HCatalog server</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hive-hcatalog>/self</hive-hcatalog> |
| </deps> |
| </hive-hcatalog-server> |
| <hive-jdbc> |
| <metadata> |
| <summary>Provides libraries necessary to connect to Apache Hive via JDBC</summary> |
| <description>This package provides libraries necessary to connect to Apache Hive via JDBC</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hadoop-client/> |
| </deps> |
| </hive-jdbc> |
| <hive> |
| <metadata> |
| <summary>Hive is a data warehouse infrastructure built on top of Hadoop</summary> |
| <description>Hive is a data warehouse infrastructure built on top of Hadoop that |
| provides tools to enable easy data summarization, adhoc querying and |
| analysis of large datasets data stored in Hadoop files. It provides a |
| mechanism to put structure on this data and it also provides a simple |
| query language called Hive QL which is based on SQL and which enables |
| users familiar with SQL to query this data. At the same time, this |
| language also allows traditional map/reduce programmers to be able to |
| plug in their custom mappers and reducers to do more sophisticated |
| analysis which may not be supported by the built-in capabilities of |
| the language.</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hadoop/> |
| <bigtop-utils/> |
| <hive-jdbc>/self</hive-jdbc> |
| </deps> |
| <alternatives> |
| <hive-conf> |
| <status>auto</status> |
| <value>/etc/hive/conf.dist</value> |
| <link>/etc/hive/conf</link> |
| <alt>/etc/hive/conf.dist</alt> |
| </hive-conf> |
| </alternatives> |
| </hive> |
| <hive-metastore> |
| <metadata> |
| <summary>Shared metadata repository for Hive.</summary> |
| <description>This optional package hosts a metadata server for Hive clients across a network to use.</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hive>/self</hive> |
| </deps> |
| <groups> |
| <hive> |
| <user>hive</user> |
| </hive> |
| </groups> |
| </hive-metastore> |
| <hive-server> |
| <metadata> |
| <summary>Provides a Hive Thrift service.</summary> |
| <description>This optional package hosts a Thrift server for Hive clients across a network to use.</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hive>/self</hive> |
| </deps> |
| <groups> |
| <hive> |
| <user>hive</user> |
| </hive> |
| </groups> |
| </hive-server> |
| <hive-hbase> |
| <metadata> |
| <summary>Provides integration between Apache HBase and Apache Hive</summary> |
| <description>This optional package provides integration between Apache HBase and Apache Hive</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hive>/self</hive> |
| <hbase/> |
| </deps> |
| </hive-hbase> |
| <hive-webhcat> |
| <metadata> |
| <summary>WebHcat provides a REST-like web API for HCatalog and related Hadoop components.</summary> |
| <description>WebHcat provides a REST-like web API for HCatalog and related Hadoop components.</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hive-hcatalog>/self</hive-hcatalog> |
| </deps> |
| <alternatives> |
| <hive-webhcat-conf> |
| <status>auto</status> |
| <value>/etc/hive-webhcat/conf.dist</value> |
| <link>/etc/hive-webhcat/conf</link> |
| <alt>/etc/hive-webhcat/conf.dist</alt> |
| </hive-webhcat-conf> |
| </alternatives> |
| </hive-webhcat> |
| <hive-webhcat-server> |
| <metadata> |
| <summary>Init scripts for WebHcat server</summary> |
| <description>Init scripts for WebHcat server.</description> |
| <url>http://hive.apache.org/</url> |
| </metadata> |
| <deps> |
| <hive-webhcat>/self</hive-webhcat> |
| </deps> |
| </hive-webhcat-server> |
| <ignite-hadoop> |
| <metadata> |
| <summary>Apache Ignite Hadoop accelerator. The system provides for in-memory caching of HDFS data and MR performance improvements</summary> |
| <description>Apache Ignite is an open-source, distributed, in-memory computation platform |
| |
| * HDFS caching and MR performance booster |
| |
| </description> |
| <url>https://ignite.incubator.apache.org/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs/> |
| <hadoop-mapreduce/> |
| <bigtop-utils/> |
| </deps> |
| <alternatives> |
| <ignite-hadoop-conf> |
| <status>auto</status> |
| <link>/etc/ignite-hadoop/conf</link> |
| <value>/etc/ignite-hadoop/conf.dist</value> |
| <alt>/etc/ignite-hadoop/conf.dist</alt> |
| </ignite-hadoop-conf> |
| </alternatives> |
| </ignite-hadoop> |
| <ignite-hadoop-service> |
| <metadata> |
| <summary>Apache Ignite Hadoop Accelerator platform</summary> |
| <description>Ignite is an open-source, distributed, in-memory computation platform</description> |
| <url>https://ignite.incubator.apache.org/</url> |
| </metadata> |
| <deps> |
| <ignite-hadoop/> |
| </deps> |
| </ignite-hadoop-service> |
| <ignite-hadoop-doc> |
| <metadata> |
| <summary>Ignite Documentation</summary> |
| <description>Documentation for Apache Ignite platform</description> |
| <url>https://ignite.incubator.apache.org/</url> |
| </metadata> |
| </ignite-hadoop-doc> |
| <hbase> |
| <metadata> |
| <summary>HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware.</summary> |
| <description>HBase is an open-source, distributed, column-oriented store modeled after Google' Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, HBase provides Bigtable-like capabilities on top of Hadoop. HBase includes: |
| |
| * Convenient base classes for backing Hadoop MapReduce jobs with HBase tables |
| * Query predicate push down via server side scan and get filters |
| * Optimizations for real time queries |
| * A high performance Thrift gateway |
| * A REST-ful Web service gateway that supports XML, Protobuf, and binary data encoding options |
| * Cascading source and sink modules |
| * Extensible jruby-based (JIRB) shell |
| * Support for exporting metrics via the Hadoop metrics subsystem to files or Ganglia; or via JMX</description> |
| <url>http://hbase.apache.org/</url> |
| </metadata> |
| <deps> |
| <zookeeper/> |
| <hadoop/> |
| <bigtop-utils/> |
| </deps> |
| <alternatives> |
| <hbase-conf> |
| <status>auto</status> |
| <value>/etc/hbase/conf.dist</value> |
| <link>/etc/hbase/conf</link> |
| <alt>/etc/hbase/conf.dist</alt> |
| </hbase-conf> |
| </alternatives> |
| <groups> |
| <hbase> |
| <user>hbase</user> |
| </hbase> |
| </groups> |
| </hbase> |
| <hbase-doc> |
| <metadata> |
| <summary>Hbase Documentation</summary> |
| <description>Documentation for Hbase</description> |
| <url>http://hbase.apache.org/</url> |
| </metadata> |
| </hbase-doc> |
| <hbase-master> |
| <metadata> |
| <summary>The Hadoop HBase master Server.</summary> |
| <description>HMaster is the "master server" for a HBase. There is only one HMaster for a single HBase deployment.</description> |
| <url>http://hbase.apache.org/</url> |
| </metadata> |
| <deps> |
| <hbase>/self</hbase> |
| </deps> |
| </hbase-master> |
| <hbase-regionserver> |
| <metadata> |
| <summary>The Hadoop HBase RegionServer server.</summary> |
| <description>HRegionServer makes a set of HRegions available to clients. It checks in with the HMaster. There are many HRegionServers in a single HBase deployment.</description> |
| <url>http://hbase.apache.org/</url> |
| </metadata> |
| <deps> |
| <hbase>/self</hbase> |
| </deps> |
| </hbase-regionserver> |
| <hbase-thrift> |
| <metadata> |
| <summary>The Hadoop HBase Thrift Interface</summary> |
| <description>ThriftServer - this class starts up a Thrift server which implements the Hbase API specified in the Hbase.thrift IDL file. |
| "Thrift is a software framework for scalable cross-language services development. It combines a powerful software stack with a code generation engine to build services that work efficiently and seamlessly between C++, Java, Python, PHP, and Ruby. Thrift was developed at Facebook, and we are now releasing it as open source." For additional information, see http://developers.facebook.com/thrift/. Facebook has announced their intent to migrate Thrift into Apache Incubator.</description> |
| <url>http://hbase.apache.org/</url> |
| </metadata> |
| <deps> |
| <hbase>/self</hbase> |
| </deps> |
| </hbase-thrift> |
| <hbase-rest> |
| <metadata> |
| <summary>The Apache HBase REST gateway</summary> |
| <description>The Apache HBase REST gateway</description> |
| <url>http://hbase.apache.org/</url> |
| </metadata> |
| <deps> |
| <hbase>/self</hbase> |
| </deps> |
| </hbase-rest> |
| <phoenix> |
| <metadata> |
| <summary>Phoenix is a SQL skin over HBase and client-embedded JDBC driver.</summary> |
| <description>Phoenix is a SQL skin over HBase, delivered as a client-embedded JDBC driver. |
| The Phoenix query engine transforms an SQL query into one or more HBase scans, |
| and orchestrates their execution to produce standard JDBC result sets. Direct |
| use of the HBase API, along with coprocessors and custom filters, results in |
| performance on the order of milliseconds for small queries, or seconds for |
| tens of millions of rows. Applications interact with Phoenix through a |
| standard JDBC interface; all the usual interfaces are supported.</description> |
| <url>http://phoenix.apache.org</url> |
| </metadata> |
| <deps> |
| <zookeeper/> |
| <hadoop/> |
| <hadoop-mapreduce/> |
| <hadoop-yarn/> |
| <hbase/> |
| </deps> |
| <alternatives> |
| <phoenix-conf> |
| <status>auto</status> |
| <value>/etc/phoenix/conf.dist</value> |
| <link>/etc/phoenix/conf</link> |
| <alt>/etc/phoenix/conf.dist</alt> |
| </phoenix-conf> |
| </alternatives> |
| </phoenix> |
| <hadoop> |
| <metadata> |
| <summary>Hadoop is a software platform for processing vast amounts of data</summary> |
| <description>Hadoop is a software platform that lets one easily write and |
| run applications that process vast amounts of data. |
| |
| Here's what makes Hadoop especially useful: |
| * Scalable: Hadoop can reliably store and process petabytes. |
| * Economical: It distributes the data and processing across clusters |
| of commonly available computers. These clusters can number |
| into the thousands of nodes. |
| * Efficient: By distributing the data, Hadoop can process it in parallel |
| on the nodes where the data is located. This makes it |
| extremely rapid. |
| * Reliable: Hadoop automatically maintains multiple copies of data and |
| automatically redeploys computing tasks based on failures. |
| |
| Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS). |
| MapReduce divides applications into many small blocks of work. HDFS creates |
| multiple replicas of data blocks for reliability, placing them on compute |
| nodes around the cluster. MapReduce can then process the data where it is |
| located.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <bigtop-utils/> |
| </deps> |
| <alternatives> |
| <hadoop-conf> |
| <status>auto</status> |
| <link>/etc/hadoop/conf</link> |
| <value>/etc/hadoop/conf.empty</value> |
| <alt>/etc/hadoop/conf.empty</alt> |
| </hadoop-conf> |
| </alternatives> |
| </hadoop> |
| <hadoop-hdfs> |
| <metadata> |
| <summary>The Hadoop Distributed File System</summary> |
| <description>Hadoop Distributed File System (HDFS) is the primary storage system used by |
| Hadoop applications. HDFS creates multiple replicas of data blocks and distributes |
| them on compute nodes throughout a cluster to enable reliable, extremely rapid |
| computations.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop>/self</hadoop> |
| <bigtop-utils/> |
| </deps> |
| <groups> |
| <hdfs> |
| <user>hdfs</user> |
| </hdfs> |
| </groups> |
| </hadoop-hdfs> |
| <hadoop-yarn> |
| <metadata> |
| <summary>The Hadoop NextGen MapReduce (YARN)</summary> |
| <description>YARN (Hadoop NextGen MapReduce) is a general purpose data-computation framework. |
| The fundamental idea of YARN is to split up the two major functionalities of the |
| JobTracker, resource management and job scheduling/monitoring, into separate daemons: |
| ResourceManager and NodeManager. |
| |
| The ResourceManager is the ultimate authority that arbitrates resources among all |
| the applications in the system. The NodeManager is a per-node slave managing allocation |
| of computational resources on a single node. Both work in support of per-application |
| ApplicationMaster (AM). |
| |
| An ApplicationMaster is, in effect, a framework specific library and is tasked with |
| negotiating resources from the ResourceManager and working with the NodeManager(s) to |
| execute and monitor the tasks.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop>/self</hadoop> |
| <bigtop-utils/> |
| </deps> |
| <groups> |
| <yarn> |
| <user>yarn</user> |
| </yarn> |
| </groups> |
| </hadoop-yarn> |
| <hadoop-mapreduce> |
| <metadata> |
| <summary>The Hadoop MapReduce (MRv2)</summary> |
| <description>Hadoop MapReduce is a programming model and software framework for writing applications |
| that rapidly process vast amounts of data in parallel on large clusters of compute nodes.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-yarn>/self</hadoop-yarn> |
| <bigtop-utils/> |
| </deps> |
| <groups> |
| <mapred> |
| <user>mapred</user> |
| </mapred> |
| </groups> |
| </hadoop-mapreduce> |
| <hadoop-httpfs> |
| <metadata> |
| <summary>HTTPFS for Hadoop</summary> |
| <description>The server providing HTTP REST API support for the complete FileSystem/FileContext |
| interface in HDFS.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| <bigtop-utils/> |
| </deps> |
| <groups> |
| <httpfs> |
| <user>httpfs</user> |
| </httpfs> |
| </groups> |
| </hadoop-httpfs> |
| <hadoop-hdfs-namenode> |
| <metadata> |
| <summary>The Hadoop namenode manages the block locations of HDFS files</summary> |
| <description>The Hadoop Distributed Filesystem (HDFS) requires one unique server, the |
| namenode, which manages the block locations of files on the filesystem.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| </deps> |
| </hadoop-hdfs-namenode> |
| <hadoop-hdfs-zkfc> |
| <metadata> |
| <summary>Hadoop HDFS failover controller</summary> |
| <description>The Hadoop HDFS failover controller is a ZooKeeper client which also |
| monitors and manages the state of the NameNode. Each of the machines |
| which runs a NameNode also runs a ZKFC, and that ZKFC is responsible |
| for: Health monitoring, ZooKeeper session management, ZooKeeper-based |
| election.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| </deps> |
| </hadoop-hdfs-zkfc> |
| <hadoop-hdfs-journalnode> |
| <metadata> |
| <summary>Hadoop HDFS JournalNode</summary> |
| <description>The HDFS JournalNode is responsible for persisting NameNode edit logs. |
| In a typical deployment the JournalNode daemon runs on at least three |
| separate machines in the cluster.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| <hadoop>/self</hadoop> |
| </deps> |
| </hadoop-hdfs-journalnode> |
| <hadoop-hdfs-secondarynamenode> |
| <metadata> |
| <summary>Hadoop Secondary namenode</summary> |
| <description>The Secondary Name Node periodically compacts the Name Node EditLog |
| into a checkpoint. This compaction ensures that Name Node restarts |
| do not incur unnecessary downtime.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| </deps> |
| </hadoop-hdfs-secondarynamenode> |
| <hadoop-hdfs-datanode> |
| <metadata> |
| <summary>Hadoop Data Node</summary> |
| <description>The Data Nodes in the Hadoop Cluster are responsible for serving up |
| blocks of data over the network to Hadoop Distributed Filesystem |
| (HDFS) clients.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| </deps> |
| </hadoop-hdfs-datanode> |
| <hadoop-yarn-resourcemanager> |
| <metadata> |
| <summary>YARN Resource Manager</summary> |
| <description>The resource manager manages the global assignment of compute resources to applications</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-yarn>/self</hadoop-yarn> |
| </deps> |
| </hadoop-yarn-resourcemanager> |
| <hadoop-yarn-nodemanager> |
| <metadata> |
| <summary>YARN Node Manager</summary> |
| <description>The NodeManager is the per-machine framework agent who is responsible for |
| containers, monitoring their resource usage (cpu, memory, disk, network) and |
| reporting the same to the ResourceManager/Scheduler.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-yarn>/self</hadoop-yarn> |
| </deps> |
| </hadoop-yarn-nodemanager> |
| <hadoop-yarn-proxyserver> |
| <metadata> |
| <summary>YARN Web Proxy</summary> |
| <description>The web proxy server sits in front of the YARN application master web UI.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-yarn>/self</hadoop-yarn> |
| </deps> |
| </hadoop-yarn-proxyserver> |
| <hadoop-mapreduce-historyserver> |
| <metadata> |
| <summary>MapReduce History Server</summary> |
| <description>The History server keeps records of the different activities being performed on a Apache Hadoop cluster</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-mapreduce>/self</hadoop-mapreduce> |
| </deps> |
| </hadoop-mapreduce-historyserver> |
| <hadoop-conf-pseudo> |
| <metadata> |
| <summary>Pseudo-distributed Hadoop configuration</summary> |
| <description>Contains configuration files for a "pseudo-distributed" Hadoop deployment. |
| In this mode, each of the hadoop components runs as a separate Java process, |
| but all on the same machine.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop>/self</hadoop> |
| <hadoop-hdfs-namenode>/self</hadoop-hdfs-namenode> |
| <hadoop-hdfs-datanode>/self</hadoop-hdfs-datanode> |
| <hadoop-hdfs-secondarynamenode>/self</hadoop-hdfs-secondarynamenode> |
| <hadoop-yarn-resourcemanager>/self</hadoop-yarn-resourcemanager> |
| <hadoop-yarn-nodemanager>/self</hadoop-yarn-nodemanager> |
| <hadoop-mapreduce-historyserver>/self</hadoop-mapreduce-historyserver> |
| </deps> |
| </hadoop-conf-pseudo> |
| <hadoop-doc> |
| <metadata> |
| <summary>Hadoop Documentation</summary> |
| <description>Documentation for Hadoop</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| </hadoop-doc> |
| <hadoop-client> |
| <metadata> |
| <summary>Hadoop client side dependencies</summary> |
| <description>Installation of this package will provide you with all the dependencies for Hadoop clients.</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop>/self</hadoop> |
| <hadoop-hdfs>/self</hadoop-hdfs> |
| <hadoop-yarn>/self</hadoop-yarn> |
| <hadoop-mapreduce>/self</hadoop-mapreduce> |
| </deps> |
| </hadoop-client> |
| <hadoop-hdfs-fuse> |
| <metadata> |
| <summary>Mountable HDFS</summary> |
| <description>These projects (enumerated below) allow HDFS to be mounted (on most flavors of Unix) as a standard file system using</description> |
| <url>http://hadoop.apache.org/core/</url> |
| </metadata> |
| <deps> |
| <hadoop-client>/self</hadoop-client> |
| <hadoop>/self</hadoop> |
| <hadoop-libhdfs>/self</hadoop-libhdfs> |
| </deps> |
| </hadoop-hdfs-fuse> |
| <hue-common> |
| <metadata> |
| <summary>A browser-based desktop interface for Hadoop</summary> |
| <description>Hue is a browser-based desktop interface for interacting with Hadoop. |
| It supports a file browser, job tracker interface, cluster health monitor, and more.</description> |
| <url>http://github.com/cloudera/hue</url> |
| </metadata> |
| <deps> |
| <hue-server>/self</hue-server> |
| <hue-beeswax>/self</hue-beeswax> |
| </deps> |
| <alternatives> |
| <hue-conf> |
| <status>auto</status> |
| <link>/etc/hue/conf</link> |
| <value>/etc/hue/conf.empty</value> |
| <alt>/etc/hue/conf.empty</alt> |
| </hue-conf> |
| </alternatives> |
| </hue-common> |
| <hue-server> |
| <metadata> |
| <summary>Service Scripts for Hue</summary> |
| <description>This package provides the service scripts for Hue server.</description> |
| <url>http://github.com/cloudera/hue</url> |
| </metadata> |
| <deps> |
| <hue-common>/self</hue-common> |
| </deps> |
| </hue-server> |
| <hue-doc> |
| <metadata> |
| <summary>Documentation for Hue</summary> |
| <description>This package provides the installation manual, user guide, SDK documentation, and release notes.</description> |
| <url>http://github.com/cloudera/hue</url> |
| </metadata> |
| <deps> |
| </deps> |
| <alternatives> |
| </alternatives> |
| </hue-doc> |
| <hue-beeswax> |
| <metadata> |
| <summary>A UI for Hive on Hue</summary> |
| <description>Beeswax is a web interface for Hive. |
| |
| It allows users to construct and run queries on Hive, manage tables, |
| and import and export data.</description> |
| <url>http://github.com/cloudera/hue</url> |
| </metadata> |
| <deps> |
| <hue-common>/self</hue-common> |
| <hive/> |
| <make/> |
| </deps> |
| </hue-beeswax> |
| <hue-pig> |
| <metadata> |
| <summary>A UI for Pig on Hue</summary> |
| <description>A web interface for Pig. |
| |
| It allows users to construct and run Pig jobs.</description> |
| <url>http://github.com/cloudera/hue</url> |
| </metadata> |
| <deps> |
| <hue-common>/self</hue-common> |
| <make/> |
| <pig/> |
| </deps> |
| </hue-pig> |
| <hue> |
| <metadata> |
| <summary>The hue metapackage</summary> |
| <description>Hue is a browser-based desktop interface for interacting with Hadoop. It supports a file browser, job tracker interface, cluster health monitor, and more.</description> |
| <url>http://github.com/cloudera/hue</url> |
| </metadata> |
| <deps> |
| <hue-server>/self</hue-server> |
| <hue-beeswax>/self</hue-beeswax> |
| </deps> |
| </hue> |
| </packages> |