blob: 90c002a3e7ed2567cb0414abe3c4c51436d7e34d [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<application>
<type>MR_HISTORY_JOB_APP</type>
<name>Map Reduce History Job</name>
<configuration>
<!-- org.apache.eagle.jpm.mr.history.MRHistoryJobConfig -->
<property>
<name>workers</name>
<displayName>Worker Number</displayName>
<description>the number of storm workers will be used</description>
<value>2</value>
</property>
<property>
<name>stormConfig.mrHistoryJobSpoutTasks</name>
<displayName>Read Task Number</displayName>
<description>the number tasks of the spout will be assigned</description>
<value>4</value>
</property>
<property>
<name>stormConfig.jobKafkaSinkTasks</name>
<displayName>Sink Task Number For Job Stream</displayName>
<description>the number tasks of the sink bolt will be assigned</description>
<value>1</value>
</property>
<property>
<name>stormConfig.taskAttemptKafkaSinkTasks</name>
<displayName>Sink Task Number For Task Attempt Stream</displayName>
<description>the number tasks of the sink bolt will be assigned</description>
<value>1</value>
</property>
<property>
<name>endpointConfig.hdfs.fs.defaultFS</name>
<displayName>HDFS Address</displayName>
<description>The name of the default file system. Either the literal string "local" or a host:port for NDFS</description>
<value>hdfs://sandbox.hortonworks.com:8020</value>
<required>true</required>
</property>
<property>
<name>endpointConfig.basePath</name>
<displayName>Map Reduce History Log File Path</displayName>
<description>which directory do map reduce history job files locate in</description>
<value>/mapred/history/done</value>
<required>true</required>
</property>
<property>
<name>endpointConfig.mrHistoryServerUrl</name>
<displayName>Map Reduce History Server</displayName>
<description>map reduce history server url address</description>
<value>http://sandbox.hortonworks.com:19888</value>
<required>true</required>
</property>
<property>
<name>endpointConfig.timeZone</name>
<displayName>Time Zone</displayName>
<description>which time zone do hdfs data nodes locate in</description>
<value>Etc/GMT+7</value>
<required>true</required>
</property>
<property>
<name>dataSinkConfig.MAP_REDUCE_JOB_STREAM.topic</name>
<displayName>Destination(Kafka Topic) Of Job Stream Data</displayName>
<value>map_reduce_job_${siteId}</value>
<description>application emits job stream data to this kafka topic</description>
<required>true</required>
</property>
<property>
<name>dataSinkConfig.MAP_REDUCE_TASK_ATTEMPT_STREAM.topic</name>
<displayName>Destination(Kafka Topic) Of Task Attempt Stream Data</displayName>
<value>map_reduce_task_attempt_${siteId}</value>
<description>application emits task attempt stream data to this kafka topic</description>
<required>true</required>
</property>
<property>
<name>dataSinkConfig.brokerList</name>
<displayName>Broker(Kafka) List Of Stream Data</displayName>
<value>localhost:6667</value>
<description>kafka broker list for stream data</description>
<required>true</required>
</property>
<property>
<name>dataSourceConfig.zkConnection</name>
<displayName>Kafka Zookeeper Quorum</displayName>
<value>localhost:2181</value>
<description>kafka zookeeper connection</description>
<required>true</required>
</property>
<property>
<name>dataSinkConfig.serializerClass</name>
<displayName>Serializer Class For Kafka Message Value</displayName>
<value>kafka.serializer.StringEncoder</value>
<description>serializer class Kafka message value</description>
</property>
<property>
<name>dataSinkConfig.keySerializerClass</name>
<displayName>Serializer Class For Kafka Message Key</displayName>
<value>kafka.serializer.StringEncoder</value>
<description>serializer class Kafka message key</description>
</property>
<property>
<name>dataSinkConfig.producerType</name>
<displayName>Producer Type Of Stream Data</displayName>
<value>async</value>
<description>whether the messages are sent asynchronously in a background thread</description>
</property>
<property>
<name>dataSinkConfig.numBatchMessages</name>
<displayName>Number Of Message Sent In One Batch</displayName>
<value>4096</value>
<description>number of messages to send in one batch when using async mode</description>
</property>
<property>
<name>dataSinkConfig.maxQueueBufferMs</name>
<displayName>Maximum Time To Buffer Data</displayName>
<value>5000</value>
<description>maximum time to buffer data when using async mode</description>
</property>
<property>
<name>dataSinkConfig.requestRequiredAcks</name>
<displayName>Requires Ack For Stream Data</displayName>
<value>0</value>
<description>value controls when a produce request is considered completed</description>
</property>
<property>
<name>MRConfigureKeys.jobConfigKey</name>
<displayName>Map Reduce Extracted Configuration Keys</displayName>
<description>which configures will be extracted from map reduce job configurations</description>
<value>mapreduce.map.output.compress,
mapreduce.map.output.compress.codec,
mapreduce.output.fileoutputformat.compress,
mapreduce.output.fileoutputformat.compress.type,
mapreduce.output.fileoutputformat.compress.codec,
mapred.output.format.class,
dataplatform.etl.info,
mapreduce.map.memory.mb,
mapreduce.reduce.memory.mb,
mapreduce.map.java.opts,
mapreduce.reduce.java.opts</value>
</property>
<property>
<name>MRConfigureKeys.jobNameKey</name>
<displayName>Map Reduce Job Name Key</displayName>
<description>User use -Dkey=value to specify name of a job when submit. use this to extract job name from job configuration</description>
<value>eagle.job.name</value>
</property>
</configuration>
<streams>
<stream>
<streamId>MAP_REDUCE_JOB_STREAM</streamId>
<description>Map Reduce Job Stream</description>
<validate>true</validate>
<columns>
<column>
<name>site</name>
<type>string</type>
</column>
<column>
<name>queue</name>
<type>string</type>
</column>
<column>
<name>user</name>
<type>string</type>
</column>
<column>
<name>jobType</name>
<type>string</type>
</column>
<column>
<name>jobDefId</name>
<type>string</type>
</column>
<column>
<name>jobId</name>
<type>string</type>
</column>
<column>
<name>trackingUrl</name>
<type>string</type>
</column>
<column>
<name>totalOpsPerSecond</name>
<type>double</type>
<defaultValue>0.0</defaultValue>
</column>
<column>
<name>mapOpsPerSecond</name>
<type>double</type>
<defaultValue>0.0</defaultValue>
</column>
<column>
<name>reduceOpsPerSecond</name>
<type>double</type>
<defaultValue>0.0</defaultValue>
</column>
<column>
<name>avgOpsPerTask</name>
<type>double</type>
<defaultValue>0.0</defaultValue>
</column>
<column>
<name>avgOpsPerMapTask</name>
<type>double</type>
<defaultValue>0.0</defaultValue>
</column>
<column>
<name>avgOpsPerReduceTask</name>
<type>double</type>
<defaultValue>0.0</defaultValue>
</column>
<column>
<name>avgMapTime</name>
<type>double</type>
</column>
<column>
<name>avgReduceTime</name>
<type>double</type>
</column>
<column>
<name>numTotalMaps</name>
<type>int</type>
</column>
<column>
<name>numTotalReduces</name>
<type>int</type>
</column>
<column>
<name>duration</name>
<type>long</type>
</column>
<column>
<name>currentState</name>
<type>string</type>
</column>
</columns>
</stream>
<stream>
<streamId>MAP_REDUCE_TASK_ATTEMPT_STREAM</streamId>
<description>Map Reduce Task Attempt Stream</description>
<validate>true</validate>
<columns>
<column>
<name>site</name>
<type>string</type>
</column>
<column>
<name>rack</name>
<type>string</type>
</column>
<column>
<name>hostname</name>
<type>string</type>
</column>
<column>
<name>user</name>
<type>string</type>
</column>
<column>
<name>jobType</name>
<type>string</type>
</column>
<column>
<name>jobDefId</name>
<type>string</type>
</column>
<column>
<name>jobId</name>
<type>string</type>
</column>
<column>
<name>taskId</name>
<type>string</type>
</column>
<column>
<name>taskType</name>
<type>string</type>
</column>
<column>
<name>taskAttemptId</name>
<type>string</type>
</column>
<column>
<name>errorCategory</name>
<type>string</type>
</column>
<column>
<name>startTime</name>
<type>long</type>
</column>
<column>
<name>endTime</name>
<type>long</type>
</column>
<column>
<name>taskStatus</name>
<type>string</type>
</column>
</columns>
</stream>
</streams>
<docs>
<install>
</install>
<uninstall>
</uninstall>
</docs>
</application>