blob: 730b70c4d63267cd9890f8170e827bba0cb7606c [file] [log] [blame]
<!DOCTYPE html>
<!--
| Generated by Apache Maven Doxia Site Renderer 1.8 from src/site/markdown/metron-analytics/metron-profiler-spark/index.md at 2019-05-14
| Rendered using Apache Maven Fluido Skin 1.7
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="Date-Revision-yyyymmdd" content="20190514" />
<meta http-equiv="Content-Language" content="en" />
<title>Metron &#x2013; Metron Profiler for Spark</title>
<link rel="stylesheet" href="../../css/apache-maven-fluido-1.7.min.css" />
<link rel="stylesheet" href="../../css/site.css" />
<link rel="stylesheet" href="../../css/print.css" media="print" />
<script type="text/javascript" src="../../js/apache-maven-fluido-1.7.min.js"></script>
<script type="text/javascript">
$( document ).ready( function() { $( '.carousel' ).carousel( { interval: 3500 } ) } );
</script>
</head>
<body class="topBarDisabled">
<div class="container-fluid">
<div id="banner">
<div class="pull-left"><a href="http://metron.apache.org/" id="bannerLeft"><img src="../../images/metron-logo.png" alt="Apache Metron" width="148px" height="48px"/></a></div>
<div class="pull-right"></div>
<div class="clear"><hr/></div>
</div>
<div id="breadcrumbs">
<ul class="breadcrumb">
<li class=""><a href="http://www.apache.org" class="externalLink" title="Apache">Apache</a><span class="divider">/</span></li>
<li class=""><a href="http://metron.apache.org/" class="externalLink" title="Metron">Metron</a><span class="divider">/</span></li>
<li class=""><a href="../../index.html" title="Documentation">Documentation</a><span class="divider">/</span></li>
<li class="active ">Metron Profiler for Spark</li>
<li id="publishDate" class="pull-right"><span class="divider">|</span> Last Published: 2019-05-14</li>
<li id="projectVersion" class="pull-right">Version: 0.7.1</li>
</ul>
</div>
<div class="row-fluid">
<div id="leftColumn" class="span2">
<div class="well sidebar-nav">
<ul class="nav nav-list">
<li class="nav-header">User Documentation</li>
<li><a href="../../index.html" title="Metron"><span class="icon-chevron-down"></span>Metron</a>
<ul class="nav nav-list">
<li><a href="../../CONTRIBUTING.html" title="CONTRIBUTING"><span class="none"></span>CONTRIBUTING</a></li>
<li><a href="../../Upgrading.html" title="Upgrading"><span class="none"></span>Upgrading</a></li>
<li><a href="../../metron-analytics/index.html" title="Analytics"><span class="icon-chevron-down"></span>Analytics</a>
<ul class="nav nav-list">
<li><a href="../../metron-analytics/metron-maas-service/index.html" title="Maas-service"><span class="none"></span>Maas-service</a></li>
<li><a href="../../metron-analytics/metron-profiler-client/index.html" title="Profiler-client"><span class="none"></span>Profiler-client</a></li>
<li><a href="../../metron-analytics/metron-profiler-common/index.html" title="Profiler-common"><span class="none"></span>Profiler-common</a></li>
<li><a href="../../metron-analytics/metron-profiler-repl/index.html" title="Profiler-repl"><span class="none"></span>Profiler-repl</a></li>
<li class="active"><a href="#"><span class="none"></span>Profiler-spark</a></li>
<li><a href="../../metron-analytics/metron-profiler-storm/index.html" title="Profiler-storm"><span class="none"></span>Profiler-storm</a></li>
<li><a href="../../metron-analytics/metron-statistics/index.html" title="Statistics"><span class="icon-chevron-right"></span>Statistics</a></li>
</ul>
</li>
<li><a href="../../metron-contrib/metron-docker/index.html" title="Docker"><span class="none"></span>Docker</a></li>
<li><a href="../../metron-contrib/metron-performance/index.html" title="Performance"><span class="none"></span>Performance</a></li>
<li><a href="../../metron-deployment/index.html" title="Deployment"><span class="icon-chevron-right"></span>Deployment</a></li>
<li><a href="../../metron-interface/index.html" title="Interface"><span class="icon-chevron-right"></span>Interface</a></li>
<li><a href="../../metron-platform/index.html" title="Platform"><span class="icon-chevron-right"></span>Platform</a></li>
<li><a href="../../metron-sensors/index.html" title="Sensors"><span class="icon-chevron-right"></span>Sensors</a></li>
<li><a href="../../metron-stellar/stellar-3rd-party-example/index.html" title="Stellar-3rd-party-example"><span class="none"></span>Stellar-3rd-party-example</a></li>
<li><a href="../../metron-stellar/stellar-common/index.html" title="Stellar-common"><span class="icon-chevron-right"></span>Stellar-common</a></li>
<li><a href="../../metron-stellar/stellar-zeppelin/index.html" title="Stellar-zeppelin"><span class="none"></span>Stellar-zeppelin</a></li>
<li><a href="../../use-cases/index.html" title="Use-cases"><span class="icon-chevron-right"></span>Use-cases</a></li>
</ul>
</li>
</ul>
<hr />
<div id="poweredBy">
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy"><img class="builtBy" alt="Built by Maven" src="../../images/logos/maven-feather.png" /></a>
</div>
</div>
</div>
<div id="bodyColumn" class="span10" >
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<h1>Metron Profiler for Spark</h1>
<p><a name="Metron_Profiler_for_Spark"></a></p>
<p>This project allows profiles to be executed using <a class="externalLink" href="https://spark.apache.org">Apache Spark</a>. This is a port of the Profiler to Spark that allows you to backfill profiles using archived telemetry.</p>
<ul>
<li><a href="#Introduction">Introduction</a></li>
<li><a href="#Getting_Started">Getting Started</a></li>
<li><a href="#Installation">Installation</a></li>
<li><a href="#Running_the_Profiler">Running the Profiler</a></li>
<li><a href="#Configuring_the_Profiler">Configuring the Profiler</a></li>
</ul>
<div class="section">
<h2><a name="Introduction"></a>Introduction</h2>
<p>Using the <a href="../metron-profiler-storm/index.html">Streaming Profiler</a> in <a class="externalLink" href="http://storm.apache.org">Apache Storm</a> allows you to create profiles based on the stream of telemetry being captured, enriched, triaged, and indexed by Metron. This does not allow you to create a profile based on telemetry that was captured in the past.</p>
<p>There are many cases where you might want to produce a profile from telemetry in the past. This is referred to as profile seeding or backfilling.</p>
<ul>
<li>
<p>As a Security Data Scientist, I want to understand the historical behaviors and trends of a profile so that I can determine if the profile has predictive value for model building.</p>
</li>
<li>
<p>As a Security Platform Engineer, I want to generate a profile using archived telemetry when I deploy a new model to production so that models depending on that profile can function on day 1.</p>
</li>
</ul>
<p>The Batch Profiler running in <a class="externalLink" href="https://spark.apache.org">Apache Spark</a> allows you to seed a profile using archived telemetry.</p>
<p>The portion of a profile produced by the Batch Profiler should be indistinguishable from the portion created by the Streaming Profiler. Consumers of the profile should not care how the profile was generated. Using the Streaming Profiler together with the Batch Profiler allows you to create a complete profile over a wide range of time.</p>
<p>For an introduction to the Profiler, see the <a href="../metron-profiler-common/index.html">Profiler README</a>.</p></div>
<div class="section">
<h2><a name="Getting_Started"></a>Getting Started</h2>
<ol style="list-style-type: decimal">
<li>
<p>Create a profile definition by editing <tt>$METRON_HOME/config/zookeeper/profiler.json</tt> as follows.</p>
<div>
<div>
<pre class="source">cat $METRON_HOME/config/zookeeper/profiler.json
{
&quot;profiles&quot;: [
{
&quot;profile&quot;: &quot;hello-world&quot;,
&quot;foreach&quot;: &quot;'global'&quot;,
&quot;init&quot;: { &quot;count&quot;: &quot;0&quot; },
&quot;update&quot;: { &quot;count&quot;: &quot;count + 1&quot; },
&quot;result&quot;: &quot;count&quot;
}
],
&quot;timestampField&quot;: &quot;timestamp&quot;
}
</pre></div></div>
</li>
<li>
<p>Ensure that you have archived telemetry available for the Batch Profiler to consume. By default, Metron will store this in HDFS at <tt>/apps/metron/indexing/indexed/*/*</tt>.</p>
<div>
<div>
<pre class="source">hdfs dfs -cat /apps/metron/indexing/indexed/*/* | wc -l
</pre></div></div>
</li>
<li>
<p>Copy the <tt>hbase-site.xml</tt> file from <tt>/etc/hbase/conf</tt> to <tt>/etc/spark2/conf</tt>. It is advised to create a symlink to avoid the duplication of file, also to keep consistency between files while config updates.</p>
<div>
<div>
<pre class="source">ln -s /etc/hbase/conf/hbase-site.xml /etc/spark2/conf/hbase-site.xml
</pre></div></div>
</li>
<li>
<p>Review the Batch Profiler&#x2019;s properties located at <tt>$METRON_HOME/config/batch-profiler.properties</tt>. See <a href="#Configuring_the_Profiler">Configuring the Profiler</a> for more information on these properties.</p>
</li>
<li>
<p>You may want to edit the log4j properties that sits in your config directory in <tt>${SPARK_HOME}</tt> or create one. It may be helpful to turn on <tt>DEBUG</tt> logging for the Profiler by adding the following line.</p>
<div>
<div>
<pre class="source"> log4j.logger.org.apache.metron.profiler.spark=DEBUG
</pre></div></div>
</li>
<li>
<p>Run the Batch Profiler.</p>
<div>
<div>
<pre class="source">source /etc/default/metron
cd $METRON_HOME
$METRON_HOME/bin/start_batch_profiler.sh
</pre></div></div>
</li>
<li>
<p>Query for the profile data using the <a href="../metron-profiler-client/index.html">Profiler Client</a>.</p>
</li>
</ol></div>
<div class="section">
<h2><a name="Installation"></a>Installation</h2>
<p>The Batch Profiler package is installed automatically when installing Metron using the Ambari MPack. See the following notes when installing the Batch Profiler without the Ambari MPack.</p>
<div class="section">
<h3><a name="Prerequisites"></a>Prerequisites</h3>
<p>The Batch Profiler requires Spark version 2.3.0+.</p>
<div class="section">
<h4><a name="Build_the_RPM"></a>Build the RPM</h4>
<ol style="list-style-type: decimal">
<li>
<p>Build Metron.</p>
<div>
<div>
<pre class="source">mvn clean package -DskipTests -T2C
</pre></div></div>
</li>
<li>
<p>Build the RPMs.</p>
<div>
<div>
<pre class="source">cd metron-deployment/
mvn clean package -Pbuild-rpms
</pre></div></div>
</li>
<li>
<p>Retrieve the package.</p>
<div>
<div>
<pre class="source">find ./ -name &quot;metron-profiler-spark*.rpm&quot;
</pre></div></div>
</li>
</ol></div>
<div class="section">
<h4><a name="Build_the_DEB"></a>Build the DEB</h4>
<ol style="list-style-type: decimal">
<li>
<p>Build Metron.</p>
<div>
<div>
<pre class="source">mvn clean package -DskipTests -T2C
</pre></div></div>
</li>
<li>
<p>Build the DEBs.</p>
<div>
<div>
<pre class="source">cd metron-deployment/
mvn clean package -Pbuild-debs
</pre></div></div>
</li>
<li>
<p>Retrieve the package.</p>
<div>
<div>
<pre class="source">find ./ -name &quot;metron-profiler-spark*.deb&quot;
</pre></div></div>
</li>
</ol></div></div></div>
<div class="section">
<h2><a name="Running_the_Profiler"></a>Running the Profiler</h2>
<ul>
<li><a href="#Usage">Usage</a></li>
<li><a href="#Advanced_Usage">Advanced Usage</a></li>
<li><a href="#Spark_Execution">Spark Execution</a></li>
<li><a href="#Kerberos">Kerberos</a></li>
<li><a href="#Input_Formats">Input Formats</a></li>
</ul>
<div class="section">
<h3><a name="Usage"></a>Usage</h3>
<p>A script located at <tt>$METRON_HOME/bin/start_batch_profiler.sh</tt> has been provided to simplify running the Batch Profiler. This script makes the following assumptions.</p>
<ul>
<li>
<p>The script builds the profiles defined in <tt>$METRON_HOME/config/zookeeper/profiler.json</tt>.</p>
</li>
<li>
<p>The properties defined in <tt>$METRON_HOME/config/batch-profiler.properties</tt> are passed to both the Profiler and Spark. You can define both Spark and Profiler properties in this same file.</p>
</li>
<li>
<p>The script assumes that Spark is installed at <tt>/usr/hdp/current/spark2-client</tt>. This can be overridden if you define an environment variable called <tt>SPARK_HOME</tt> prior to executing the script.</p>
</li>
</ul></div>
<div class="section">
<h3><a name="Advanced_Usage"></a>Advanced Usage</h3>
<p>The Batch Profiler may also be started using <tt>spark-submit</tt> as follows. See the Spark Documentation for more information about <a class="externalLink" href="https://spark.apache.org/docs/latest/submitting-applications.html#launching-applications-with-spark-submit"><tt>spark-submit</tt></a>.</p>
<div>
<div>
<pre class="source">${SPARK_HOME}/bin/spark-submit \
--class org.apache.metron.profiler.spark.cli.BatchProfilerCLI \
--properties-file ${SPARK_PROPS_FILE} \
${METRON_HOME}/lib/metron-profiler-spark-*.jar \
--config ${PROFILER_PROPS_FILE} \
--profiles ${PROFILES_FILE}
</pre></div></div>
<p>The Batch Profiler accepts the following arguments when run from the command line as shown above. All arguments following the Profiler jar are passed to the Profiler. All argument preceeding the Profiler jar are passed to Spark.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Argument </th>
<th> Description</th></tr>
</thead><tbody>
<tr class="b">
<td> <a href="#a--profiles"><tt>-p</tt>, <tt>--profiles</tt></a> </td>
<td> Path to the profile definitions.</td></tr>
<tr class="a">
<td> <a href="#a--config"><tt>-c</tt>, <tt>--config</tt></a> </td>
<td> Path to the profiler properties file.</td></tr>
<tr class="b">
<td> <a href="#a--globals"><tt>-g</tt>, <tt>--globals</tt></a> </td>
<td> Path to the Stellar global config file.</td></tr>
<tr class="a">
<td> <a href="#a--reader"><tt>-r</tt>, <tt>--reader</tt></a> </td>
<td> Path to properties for the DataFrameReader.</td></tr>
<tr class="b">
<td> <tt>-h</tt>, <tt>--help</tt> </td>
<td> Print the help text.</td></tr>
</tbody>
</table>
<div class="section">
<h4><a name="a--profiles"></a><tt>--profiles</tt></h4>
<p>The path to a file containing the profile definition in JSON.</p></div>
<div class="section">
<h4><a name="a--config"></a><tt>--config</tt></h4>
<p>The path to a file containing key-value properties for the Profiler. This file would contain the properties described under <a href="#Configuring_the_Profiler">Configuring the Profiler</a>.</p></div>
<div class="section">
<h4><a name="a--globals"></a><tt>--globals</tt></h4>
<p>The path to a file containing key-value properties that define the global properties. This can be used to customize how certain Stellar functions behave during execution.</p></div>
<div class="section">
<h4><a name="a--reader"></a><tt>--reader</tt></h4>
<p>The path to a file containing key-value properties that are passed to the DataFrameReader when reading the input telemetry. This allows additional customization for how the input telemetry is read.</p></div></div>
<div class="section">
<h3><a name="Spark_Execution"></a>Spark Execution</h3>
<p>Spark supports a number of different <a class="externalLink" href="https://spark.apache.org/docs/latest/cluster-overview.html#cluster-manager-types">cluster managers</a>. The underlying cluster manager is transparent to the Profiler. To run the Profiler on a particular cluster manager, it is just a matter of setting the appropriate options as defined in the Spark documentation.</p>
<div class="section">
<h4><a name="Local_Mode"></a>Local Mode</h4>
<p>By default, the Batch Profiler instructs Spark to run in local mode. This will run all of the Spark execution components within a single JVM. This mode is only useful for testing with a limited set of data.</p>
<p><tt>$METRON_HOME/config/batch-profiler.properties</tt></p>
<div>
<div>
<pre class="source">spark.master=local
</pre></div></div>
</div>
<div class="section">
<h4><a name="Spark_on_YARN"></a>Spark on YARN</h4>
<p>To run the Profiler using <a class="externalLink" href="https://spark.apache.org/docs/latest/running-on-yarn.html#running-spark-on-yarn">Spark on YARN</a>, at a minimum edit the value of <tt>spark.master</tt> as shown. In many cases it also makes sense to set the YARN <a class="externalLink" href="https://spark.apache.org/docs/latest/running-on-yarn.html#launching-spark-on-yarn">deploy mode</a> to <tt>cluster</tt>.</p>
<p><tt>$METRON_HOME/config/batch-profiler.properties</tt></p>
<div>
<div>
<pre class="source">spark.master=yarn
spark.submit.deployMode=cluster
</pre></div></div>
<p>See the Spark documentation for information on how to further control the execution of Spark on YARN. Any of <a class="externalLink" href="http://spark.apache.org/docs/latest/running-on-yarn.html#spark-properties">these properties</a> can be added to the Profiler properties file.</p>
<p>The following command can be useful to review the logs generated when the Profiler is executed on YARN.</p>
<div>
<div>
<pre class="source">yarn logs -applicationId &lt;application-id&gt;
</pre></div></div>
</div></div>
<div class="section">
<h3><a name="Kerberos"></a>Kerberos</h3>
<p>See the Spark documentation for information on running the Batch Profiler in a <a class="externalLink" href="https://spark.apache.org/docs/latest/running-on-yarn.html#running-in-a-secure-cluster">secure, kerberized cluster</a>.</p></div>
<div class="section">
<h3><a name="Input_Formats"></a>Input Formats</h3>
<p>The Profiler can consume archived telemetry stored in a variety of input formats. By default, it is configured to consume the text/json that Metron archives in HDFS. This is often not the best format for archiving telemetry. If you choose a different format, you should be able to configure the Profiler to consume it by doing the following.</p>
<ol style="list-style-type: decimal">
<li>Edit <a href="#profiler.batch.input.format"><tt>profiler.batch.input.format</tt></a> and <a href="#profiler.batch.input.path"><tt>profiler.batch.input.path</tt></a> as needed. For example, to read ORC you might do the following.
<p><tt>$METRON_HOME/config/batch-profiler.properties</tt></p>
<div>
<div>
<pre class="source"></pre></div></div>
</li>
</ol>
<p>profiler.batch.input.format=org.apache.spark.sql.execution.datasources.orc */*profiler.batch.input.path=<a class="externalLink" href="hdfs://localhost:9000/apps/metron/indexing/orc/">hdfs://localhost:9000/apps/metron/indexing/orc/</a> ```</p>
<ol style="list-style-type: decimal">
<li>If additional options are required for your input format, then use the <a href="#a--reader"><tt>--reader</tt></a> command-line argument when launching the Batch Profiler as <a href="#advanced-usage">described here</a>.</li>
</ol>
<div class="section">
<h4><a name="Common_Formats"></a>Common Formats</h4>
<p>The following examples highlight the configuration values needed to read telemetry stored in common formats. These values should be defined in the Profiler properties (see <a href="#a--config"><tt>--config</tt></a>).</p>
<div class="section">
<h5><a name="JSON"></a>JSON</h5>
<div>
<div>
<pre class="source">profiler.batch.input.reader=json
profiler.batch.input.path=/path/to/json/
</pre></div></div>
</div>
<div class="section">
<h5><a name="Apache_ORC"></a><a class="externalLink" href="https://orc.apache.org/">Apache ORC</a></h5>
<div>
<div>
<pre class="source">profiler.batch.input.reader=orc
profiler.batch.input.path=/path/to/orc/
</pre></div></div>
</div>
<div class="section">
<h5><a name="Apache_Parquet"></a><a class="externalLink" href="http://parquet.apache.org/">Apache Parquet</a></h5>
<div>
<div>
<pre class="source">profiler.batch.input.reader=parquet
profiler.batch.input.path=/path/to/parquet/
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Configuring_the_Profiler"></a>Configuring the Profiler</h2>
<p>By default, the configuration for the Batch Profiler is stored in the local filesystem at <tt>$METRON_HOME/config/batch-profiler.properties</tt>.</p>
<p>You can store both settings for the Profiler along with settings for Spark in this same file. Spark will only read settings that start with <tt>spark.</tt>.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Setting </th>
<th> Description</th></tr>
</thead><tbody>
<tr class="b">
<td> <a href="#profiler.batch.input.path"><tt>profiler.batch.input.path</tt></a> </td>
<td> The path to the input data read by the Batch Profiler.</td></tr>
<tr class="a">
<td> <a href="#profiler.batch.input.reader"><tt>profiler.batch.input.reader</tt></a> </td>
<td> The telemetry reader used to read the input data.</td></tr>
<tr class="b">
<td> <a href="#profiler.batch.input.format"><tt>profiler.batch.input.format</tt></a> </td>
<td> The format of the input data read by the Batch Profiler.</td></tr>
<tr class="a">
<td> <a href="#profilerbatchinputend"><tt>profiler.batch.input.begin</tt></a> </td>
<td> Only messages with a timestamp after this will be profiled.</td></tr>
<tr class="b">
<td> <a href="#profilerbatchinputbegin"><tt>profiler.batch.input.end</tt></a> </td>
<td> Only messages with a timestamp before this will be profiled.</td></tr>
<tr class="a">
<td> <a href="#profiler.period.duration"><tt>profiler.period.duration</tt></a> </td>
<td> The duration of each profile period.</td></tr>
<tr class="b">
<td> <a href="#profiler.period.duration.units"><tt>profiler.period.duration.units</tt></a> </td>
<td> The units used to specify the <a href="#profiler.period.duration"><tt>profiler.period.duration</tt></a>.</td></tr>
<tr class="a">
<td> <a href="#profiler.hbase.salt.divisor"><tt>profiler.hbase.salt.divisor</tt></a> </td>
<td> A salt is prepended to the row key to help prevent hot-spotting.</td></tr>
<tr class="b">
<td> <a href="#profiler.hbase.table"><tt>profiler.hbase.table</tt></a> </td>
<td> The name of the HBase table that profiles are written to.</td></tr>
<tr class="a">
<td> <a href="#profiler.hbase.column.family"><tt>profiler.hbase.column.family</tt></a> </td>
<td> The column family used to store profiles.</td></tr>
</tbody>
</table>
<div class="section">
<h3><a name="profiler.batch.input.path"></a><tt>profiler.batch.input.path</tt></h3>
<p><i>Default</i>*/*: <a class="externalLink" href="hdfs://localhost:9000/apps/metron/indexing/indexed/">hdfs://localhost:9000/apps/metron/indexing/indexed/</a></p>
<p>The path to the input data read by the Batch Profiler.</p></div>
<div class="section">
<h3><a name="profiler.batch.input.reader"></a><tt>profiler.batch.input.reader</tt></h3>
<p><i>Default</i>: json</p>
<p>Defines how the input data is treated when read. The value is not case sensitive so <tt>JSON</tt> and <tt>json</tt> are equivalent.</p>
<ul>
<li><tt>json</tt>: Read text/json formatted telemetry</li>
<li><tt>orc</tt>: Read <a class="externalLink" href="https://orc.apache.org/">Apache ORC</a> formatted telemetry</li>
<li><tt>parquet</tt>: Read <a class="externalLink" href="http://parquet.apache.org/">Apache Parquet</a> formatted telemetry</li>
<li><tt>text</tt> Consumes input data stored as raw text. Should be defined along with <a href="#profiler.batch.input.format"><tt>profiler.batch.input.format</tt></a>. Only use if the input format is not directly supported like <tt>json</tt>.</li>
<li><tt>columnar</tt> Consumes input data stored in columnar formats. Should be defined along with <a href="#profiler.batch.input.format"><tt>profiler.batch.input.format</tt></a>. Only use if the input format is not directly supported like <tt>json</tt>.</li>
</ul>
<p>See <a href="#Common_Formats">Common Formats</a> for further information.</p></div>
<div class="section">
<h3><a name="profiler.batch.input.format"></a><tt>profiler.batch.input.format</tt></h3>
<p><i>Default</i>: text</p>
<p>The format of the input data read by the Batch Profiler. This is optional and not required in most cases. For example, this property is not required when <a href="#profiler.batch.input.reader"><tt>profiler.batch.input.reader</tt></a> is <tt>json</tt>, <tt>orc</tt>, or <tt>parquet</tt>.</p></div>
<div class="section">
<h3><a name="profiler.batch.input.begin"></a><tt>profiler.batch.input.begin</tt></h3>
<p><i>Default</i>: undefined; no time constraint</p>
<p>Only messages with a timestamp equal to or after this will be profiled. The Profiler will only profiles messages with a timestamp in [<tt>profiler.batch.input.begin</tt>, <tt>profiler.batch.input.end</tt>] inclusive.</p>
<p>By default, no time constraint is defined. The value is expected to follow the <a class="externalLink" href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#ISO_INSTANT">ISO-8601 instant format</a>; 2011-12-03T10:15:30Z.</p></div>
<div class="section">
<h3><a name="profiler.batch.input.end"></a><tt>profiler.batch.input.end</tt></h3>
<p><i>Default</i>: undefined; no time constraint</p>
<p>Only messages with a timestamp before or equal to this will be profiled. The Profiler will only profiles messages with a timestamp in [<tt>profiler.batch.input.begin</tt>, <tt>profiler.batch.input.end</tt>] inclusive.</p>
<p>By default, no time constraint is defined. The value is expected to follow the <a class="externalLink" href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#ISO_INSTANT">ISO-8601 instant format</a>; 2011-12-03T10:15:30Z.</p></div>
<div class="section">
<h3><a name="profiler.period.duration"></a><tt>profiler.period.duration</tt></h3>
<p><i>Default</i>: 15</p>
<p>The duration of each profile period. This value should be defined along with <a href="#profiler.period.duration.units"><tt>profiler.period.duration.units</tt></a>.</p>
<p><i>Important</i>: To read a profile using the <a href="metron-analytics/metron-profiler-client/index.html">Profiler Client</a>, the Profiler Client&#x2019;s <tt>profiler.client.period.duration</tt> property must match this value. Otherwise, the Profiler Client will be unable to read the profile data.</p></div>
<div class="section">
<h3><a name="profiler.period.duration.units"></a><tt>profiler.period.duration.units</tt></h3>
<p><i>Default</i>: MINUTES</p>
<p>The units used to specify the <tt>profiler.period.duration</tt>. This value should be defined along with <a href="#profiler.period.duration"><tt>profiler.period.duration</tt></a>.</p>
<p><i>Important</i>: To read a profile using the Profiler Client, the Profiler Client&#x2019;s <tt>profiler.client.period.duration.units</tt> property must match this value. Otherwise, the <a href="metron-analytics/metron-profiler-client/index.html">Profiler Client</a> will be unable to read the profile data.</p></div>
<div class="section">
<h3><a name="profiler.hbase.salt.divisor"></a><tt>profiler.hbase.salt.divisor</tt></h3>
<p><i>Default</i>: 1000</p>
<p>A salt is prepended to the row key to help prevent hotspotting. This constant is used to generate the salt. This constant should be roughly equal to the number of nodes in the Hbase cluster to ensure even distribution of data.</p></div>
<div class="section">
<h3><a name="profiler.hbase.table"></a><tt>profiler.hbase.table</tt></h3>
<p><i>Default</i>: profiler</p>
<p>The name of the HBase table that profile data is written to. The Profiler expects that the table exists and is writable. It will not create the table.</p></div>
<div class="section">
<h3><a name="profiler.hbase.column.family"></a><tt>profiler.hbase.column.family</tt></h3>
<p><i>Default</i>: P</p>
<p>The column family used to store profile data in HBase.</p></div></div>
</div>
</div>
</div>
<hr/>
<footer>
<div class="container-fluid">
<div class="row-fluid">
© 2015-2016 The Apache Software Foundation. Apache Metron, Metron, Apache, the Apache feather logo,
and the Apache Metron project logo are trademarks of The Apache Software Foundation.
</div>
</div>
</footer>
</body>
</html>