blob: 007319e872241cfe12a3b1bebea4d8744b913a4f [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="author" content="Apache Software Foundation">
<link rel="shortcut icon" href="../../img/favicon.ico">
<title>Monitoring - Apache Gobblin</title>
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="../../css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
<link href="../../css/extra.css" rel="stylesheet">
<script>
// Current page data
var mkdocs_page_name = "Monitoring";
var mkdocs_page_input_path = "user-guide/Monitoring.md";
var mkdocs_page_url = null;
</script>
<script src="../../js/jquery-2.1.1.min.js" defer></script>
<script src="../../js/modernizr-2.8.3.min.js" defer></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>
</head>
<body class="wy-body-for-nav" role="document">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
<div class="wy-side-nav-search">
<a href="../.." class="icon icon-home"> Apache Gobblin</a>
<div role="search">
<form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" title="Type search term here" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1">
<a class="" href="/">Home</a>
</li>
<li class="toctree-l1">
<a class="" href="../../Powered-By/">Companies Powered By Gobblin</a>
</li>
<li class="toctree-l1">
<a class="" href="../../Getting-Started/">Getting Started</a>
</li>
<li class="toctree-l1">
<a class="" href="../../Gobblin-Architecture/">Architecture</a>
</li>
<li class="toctree-l1">
<span class="caption-text">User Guide</span>
<ul class="subnav">
<li class="">
<a class="" href="../Working-with-Job-Configuration-Files/">Job Configuration Files</a>
</li>
<li class="">
<a class="" href="../Gobblin-Deployment/">Deployment</a>
</li>
<li class="">
<a class="" href="../Gobblin-as-a-Library/">Gobblin as a Library</a>
</li>
<li class="">
<a class="" href="../Gobblin-CLI/">Gobblin CLI</a>
</li>
<li class="">
<a class="" href="../Gobblin-Compliance/">Gobblin Compliance</a>
</li>
<li class="">
<a class="" href="../Gobblin-on-Yarn/">Gobblin on Yarn</a>
</li>
<li class="">
<a class="" href="../Compaction/">Compaction</a>
</li>
<li class="">
<a class="" href="../State-Management-and-Watermarks/">State Management and Watermarks</a>
</li>
<li class="">
<a class="" href="../Working-with-the-ForkOperator/">Fork Operator</a>
</li>
<li class="">
<a class="" href="../Configuration-Properties-Glossary/">Configuration Glossary</a>
</li>
<li class="">
<a class="" href="../Source-schema-and-Converters/">Source schema and Converters</a>
</li>
<li class="">
<a class="" href="../Partitioned-Writers/">Partitioned Writers</a>
</li>
<li class=" current">
<a class="current" href="./">Monitoring</a>
<ul class="subnav">
<li class="toctree-l3"><a href="#table-of-contents">Table of Contents</a></li>
<li class="toctree-l3"><a href="#overview">Overview</a></li>
<li class="toctree-l3"><a href="#metrics-collecting-and-reporting">Metrics Collecting and Reporting</a></li>
<li class="toctree-l3"><a href="#metrics-reporting">Metrics Reporting</a></li>
<li class="toctree-l3"><a href="#metrics-collection">Metrics collection</a></li>
<ul>
<li><a class="toctree-l4" href="#jvm-metrics">JVM Metrics</a></li>
<li><a class="toctree-l4" href="#pre-defined-job-execution-metrics">Pre-defined Job Execution Metrics</a></li>
</ul>
<li class="toctree-l3"><a href="#job-execution-history-store">Job Execution History Store</a></li>
<li class="toctree-l3"><a href="#email-notifications">Email Notifications</a></li>
</ul>
</li>
<li class="">
<a class="" href="../Gobblin-template/">Template</a>
</li>
<li class="">
<a class="" href="../Gobblin-Schedulers/">Schedulers</a>
</li>
<li class="">
<a class="" href="../Job-Execution-History-Store/">Job Execution History Store</a>
</li>
<li class="">
<a class="" href="../Building-Gobblin/">Building Gobblin</a>
</li>
<li class="">
<a class="" href="../Gobblin-genericLoad/">Generic Configuration Loading</a>
</li>
<li class="">
<a class="" href="../Hive-Registration/">Hive Registration</a>
</li>
<li class="">
<a class="" href="../Config-Management/">Config Management</a>
</li>
<li class="">
<a class="" href="../Docker-Integration/">Docker Integration</a>
</li>
<li class="">
<a class="" href="../Troubleshooting/">Troubleshooting</a>
</li>
<li class="">
<a class="" href="../FAQs/">FAQs</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Sources</span>
<ul class="subnav">
<li class="">
<a class="" href="../../sources/AvroFileSource/">Avro files</a>
</li>
<li class="">
<a class="" href="../../sources/CopySource/">File copy</a>
</li>
<li class="">
<a class="" href="../../sources/QueryBasedSource/">Query based</a>
</li>
<li class="">
<a class="" href="../../sources/RestApiSource/">Rest Api</a>
</li>
<li class="">
<a class="" href="../../sources/GoogleAnalyticsSource/">Google Analytics</a>
</li>
<li class="">
<a class="" href="../../sources/GoogleDriveSource/">Google Drive</a>
</li>
<li class="">
<a class="" href="../../sources/GoogleWebmaster/">Google Webmaster</a>
</li>
<li class="">
<a class="" href="../../sources/HadoopTextInputSource/">Hadoop Text Input</a>
</li>
<li class="">
<a class="" href="../../sources/HelloWorldSource/">Hello World</a>
</li>
<li class="">
<a class="" href="../../sources/HiveAvroToOrcSource/">Hive Avro-to-ORC</a>
</li>
<li class="">
<a class="" href="../../sources/HivePurgerSource/">Hive compliance purging</a>
</li>
<li class="">
<a class="" href="../../sources/SimpleJsonSource/">JSON</a>
</li>
<li class="">
<a class="" href="../../sources/KafkaSource/">Kafka</a>
</li>
<li class="">
<a class="" href="../../sources/MySQLSource/">MySQL</a>
</li>
<li class="">
<a class="" href="../../sources/OracleSource/">Oracle</a>
</li>
<li class="">
<a class="" href="../../sources/SalesforceSource/">Salesforce</a>
</li>
<li class="">
<a class="" href="../../sources/SftpSource/">SFTP</a>
</li>
<li class="">
<a class="" href="../../sources/SqlServerSource/">SQL Server</a>
</li>
<li class="">
<a class="" href="../../sources/TeradataSource/">Teradata</a>
</li>
<li class="">
<a class="" href="../../sources/WikipediaSource/">Wikipedia</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Sinks (Writers)</span>
<ul class="subnav">
<li class="">
<a class="" href="../../sinks/AvroHdfsDataWriter/">Avro HDFS</a>
</li>
<li class="">
<a class="" href="../../sinks/ParquetHdfsDataWriter/">Parquet HDFS</a>
</li>
<li class="">
<a class="" href="../../sinks/SimpleBytesWriter/">HDFS Byte array</a>
</li>
<li class="">
<a class="" href="../../sinks/ConsoleWriter/">Console</a>
</li>
<li class="">
<a class="" href="../../sinks/CouchbaseWriter/">Couchbase</a>
</li>
<li class="">
<a class="" href="../../sinks/Http/">HTTP</a>
</li>
<li class="">
<a class="" href="../../sinks/Gobblin-JDBC-Writer/">JDBC</a>
</li>
<li class="">
<a class="" href="../../sinks/Kafka/">Kafka</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Gobblin Adaptors</span>
<ul class="subnav">
<li class="">
<a class="" href="../../adaptors/Gobblin-Distcp/">Gobblin Distcp</a>
</li>
<li class="">
<a class="" href="../../adaptors/Hive-Avro-To-ORC-Converter/">Hive Avro-To-Orc Converter</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Case Studies</span>
<ul class="subnav">
<li class="">
<a class="" href="../../case-studies/Kafka-HDFS-Ingestion/">Kafka-HDFS Ingestion</a>
</li>
<li class="">
<a class="" href="../../case-studies/Publishing-Data-to-S3/">Publishing Data to S3</a>
</li>
<li class="">
<a class="" href="../../case-studies/Writing-ORC-Data/">Writing ORC Data</a>
</li>
<li class="">
<a class="" href="../../case-studies/Hive-Distcp/">Hive Distcp</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Gobblin Data Management</span>
<ul class="subnav">
<li class="">
<a class="" href="../../data-management/Gobblin-Retention/">Retention</a>
</li>
<li class="">
<a class="" href="../../data-management/DistcpNgEvents/">Distcp-NG events</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Gobblin Metrics</span>
<ul class="subnav">
<li class="">
<a class="" href="../../metrics/Gobblin-Metrics/">Quick Start</a>
</li>
<li class="">
<a class="" href="../../metrics/Existing-Reporters/">Existing Reporters</a>
</li>
<li class="">
<a class="" href="../../metrics/Metrics-for-Gobblin-ETL/">Metrics for Gobblin ETL</a>
</li>
<li class="">
<a class="" href="../../metrics/Gobblin-Metrics-Architecture/">Gobblin Metrics Architecture</a>
</li>
<li class="">
<a class="" href="../../metrics/Implementing-New-Reporters/">Implementing New Reporters</a>
</li>
<li class="">
<a class="" href="../../metrics/Gobblin-Metrics-Performance/">Gobblin Metrics Performance</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Developer Guide</span>
<ul class="subnav">
<li class="">
<a class="" href="../../developer-guide/Customization-for-New-Source/">Customization for New Source</a>
</li>
<li class="">
<a class="" href="../../developer-guide/Customization-for-Converter-and-Operator/">Customization for Converter and Operator</a>
</li>
<li class="">
<a class="" href="../../developer-guide/CodingStyle/">Code Style Guide</a>
</li>
<li class="">
<a class="" href="../../developer-guide/Gobblin-Compliance-Design/">Gobblin Compliance Design</a>
</li>
<li class="">
<a class="" href="../../developer-guide/IDE-setup/">IDE setup</a>
</li>
<li class="">
<a class="" href="../../developer-guide/Monitoring-Design/">Monitoring Design</a>
</li>
<li class="">
<a class="" href="../../developer-guide/Documentation-Architecture/">Documentation Architecture</a>
</li>
<li class="">
<a class="" href="../../developer-guide/Contributing/">Contributing</a>
</li>
<li class="">
<a class="" href="../../developer-guide/GobblinModules/">Gobblin Modules</a>
</li>
<li class="">
<a class="" href="../../developer-guide/HighLevelConsumer/">High Level Consumer</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Project</span>
<ul class="subnav">
<li class="">
<a class="" href="../../project/Feature-List/">Feature List</a>
</li>
<li class="">
<a class="" href="/people">Contributors and Team</a>
</li>
<li class="">
<a class="" href="../../project/Talks-and-Tech-Blogs/">Talks and Tech Blog Posts</a>
</li>
<li class="">
<a class="" href="../../project/Posts/">Posts</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Miscellaneous</span>
<ul class="subnav">
<li class="">
<a class="" href="../../miscellaneous/Camus-to-Gobblin-Migration/">Camus to Gobblin Migration</a>
</li>
<li class="">
<a class="" href="../../miscellaneous/Exactly-Once-Support/">Exactly Once Support</a>
</li>
</ul>
</li>
</ul>
</div>
&nbsp;
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../..">Apache Gobblin</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../..">Docs</a> &raquo;</li>
<li>User Guide &raquo;</li>
<li>Monitoring</li>
<li class="wy-breadcrumbs-aside">
<a href="https://github.com/apache/incubator-gobblin/edit/master/docs/user-guide/Monitoring.md" rel="nofollow"> Edit on Gobblin</a>
</li>
</ul>
<hr/>
</div>
<div role="main">
<div class="section">
<h2 id="table-of-contents">Table of Contents</h2>
<div class="toc">
<ul>
<li><a href="#table-of-contents">Table of Contents</a></li>
<li><a href="#overview">Overview</a></li>
<li><a href="#metrics-collecting-and-reporting">Metrics Collecting and Reporting</a></li>
<li><a href="#metrics-reporting">Metrics Reporting</a></li>
<li><a href="#metrics-collection">Metrics collection</a><ul>
<li><a href="#jvm-metrics">JVM Metrics</a></li>
<li><a href="#pre-defined-job-execution-metrics">Pre-defined Job Execution Metrics</a></li>
</ul>
</li>
<li><a href="#job-execution-history-store">Job Execution History Store</a></li>
<li><a href="#email-notifications">Email Notifications</a></li>
</ul>
</div>
<h2 id="overview">Overview</h2>
<p>As a framework for ingesting potentially huge volume of data from many different sources, it's critical to monitor the health and status of the system and job executions. Gobblin employs a variety of approaches introduced below for this purpose. All the approaches are optional and can be configured to be turned on and off in different combinations through the framework and job configurations. </p>
<h2 id="metrics-collecting-and-reporting">Metrics Collecting and Reporting</h2>
<h2 id="metrics-reporting">Metrics Reporting</h2>
<p>Out-of-the-box, Gobblin reports metrics though:</p>
<ul>
<li><em>JMX</em> : used in the standalone deployment. Metrics reported to JMX can be checked using using tools such as <a href="http://visualvm.java.net/" rel="nofollow">VisualVM</a> or JConsole. </li>
<li><em>Metric log files</em>: Files are stored in a root directory defined by the property <code>metrics.log.dir</code>. Each Gobblin job has its own subdirectory under the root directory and each run of the job has its own metric log file named after the job ID as <code>${job_id}.metrics.log</code>.</li>
<li><em>Hadoop counters</em> : used for M/R deployments. Gobblin-specific metrics are reported in the "JOB" or "TASK" groups for job- and task- level metrics. By default, task-level metrics are not reported through Hadoop counters as doing so may cause the number of Hadoop counters to go beyond the system-wide limit. However, users can choose to turn on reporting task-level metrics as Hadoop counters by setting <code>mr.include.task.counters=true</code>. </li>
</ul>
<h2 id="metrics-collection">Metrics collection</h2>
<h3 id="jvm-metrics">JVM Metrics</h3>
<p>The standalone deployment of Gobblin runs in a single JVM so it's important to monitor the health of the JVM, through a set of pre-defined JVM metrics in the following four categories. </p>
<ul>
<li><code>jvm.gc</code>: this covers metrics related to garbage collection, e.g., counts and time spent on garbage collection.</li>
<li><code>jvm.memory</code>: this covers metrics related to memory usage, e.g., detailed heap usage. </li>
<li><code>jvm.threads</code>: this covers metrics related to thread states, e.g., thread count and thread deadlocks.</li>
<li><code>jvm.fileDescriptorRatio</code>: this measures the ratio of open file descriptors.</li>
</ul>
<p>All JVM metrics are reported via JMX and can be checked using tools such as <a href="http://visualvm.java.net/" rel="nofollow">VisualVM</a> or JConsole. </p>
<h3 id="pre-defined-job-execution-metrics">Pre-defined Job Execution Metrics</h3>
<p>Internally, Gobblin pre-defines a minimum set of metrics listed below in two metric groups: <code>JOB</code> and <code>TASK</code> for job-level metrics and task-level metrics, respectively. Those metrics are useful in keeping track of the progress and performance of job executions.</p>
<ul>
<li><code>${metric_group}.${id}.records</code>: this metric keeps track of the total number of data records extracted by the job or task depending on the <code>${metric_group}</code>. The <code>${id}</code> is either a job ID or a task ID depending on the <code>${metric_group}</code>. </li>
<li><code>${metric_group}.${id}.recordsPerSecond</code>: this metric keeps track of the rate of data extraction as data records extracted per second by the job or task depending on the <code>${metric_group}</code>.</li>
<li><code>${metric_group}.${id}.bytes</code>: this metric keeps track of the total number of bytes extracted by the job or task depending on the <code>${metric_group}</code>.</li>
<li><code>${metric_group}.${id}.bytesPerSecond</code>: this metric keeps track of the rate of data extraction as bytes extracted per second by the job or task depending on the <code>${metric_group}</code>.</li>
</ul>
<p>Among the above metrics, <code>${metric_group}.${id}.records</code> and <code>${metric_group}.${id}.bytes</code> are reported as Hadoop MapReduce counters for Gobblin jobs running on Hadoop.</p>
<h2 id="job-execution-history-store">Job Execution History Store</h2>
<p>Gobblin also supports writing job execution information to a job execution history store backed by a database of choice. Gobblin uses MySQL by default and it ships with the SQL <a href="https://github.com/apache/incubator-gobblin/tree/master/gobblin-metastore/src/main/resources/db/migration" rel="nofollow">DDLs</a> of the relevant MySQL tables, although it still allows users to choose which database to use as long as the schema of the tables is compatible. Users can use the properties <code>job.history.store.url</code> and <code>job.history.store.jdbc.driver</code> to specify the database URL and the JDBC driver to work with the database of choice. The user name and password used to access the database can be specified using the properties <code>job.history.store.user</code> and <code>job.history.store.password</code>. An example configuration is shown below:</p>
<pre><code>job.history.store.url=jdbc:mysql://localhost/gobblin
job.history.store.jdbc.driver=com.mysql.jdbc.Driver
job.history.store.user=gobblin
job.history.store.password=gobblin
</code></pre>
<h2 id="email-notifications">Email Notifications</h2>
<p>In addition to writing job execution information to the job execution history store, Gobblin also supports sending email notifications about job status. Job status notifications fall into two categories: alerts in case of job failures and normal notifications in case of successful job completions. Users can choose to enable or disable both categories using the properties <code>email.alert.enabled</code> and <code>email.notification.enabled</code>. </p>
<p>The main content of an email alert or notification is a job status report in Json format. Below is an example job status report:</p>
<pre><code>{
&quot;job name&quot;: &quot;Gobblin_Demo_Job&quot;,
&quot;job id&quot;: &quot;job_Gobblin_Demo_Job_1417487480842&quot;,
&quot;job state&quot;: &quot;COMMITTED&quot;,
&quot;start time&quot;: 1417487480874,
&quot;end time&quot;: 1417490858913,
&quot;duration&quot;: 3378039,
&quot;tasks&quot;: 1,
&quot;completed tasks&quot;: 1,
&quot;task states&quot;: [
{
&quot;task id&quot;: &quot;task_Gobblin_Demo_Job_1417487480842_0&quot;,
&quot;task state&quot;: &quot;COMMITTED&quot;,
&quot;start time&quot;: 1417490795903,
&quot;end time&quot;: 1417490858908,
&quot;duration&quot;: 63005,
&quot;high watermark&quot;: -1,
&quot;exception&quot;: &quot;&quot;
}
]
}
</code></pre>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="../Gobblin-template/" class="btn btn-neutral float-right" title="Template">Next <span class="icon icon-circle-arrow-right"></span></a>
<a href="../Partitioned-Writers/" class="btn btn-neutral" title="Partitioned Writers"><span class="icon icon-circle-arrow-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<!-- Copyright etc -->
</div>
Built with <a href="http://www.mkdocs.org" rel="nofollow">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme" rel="nofollow">theme</a> provided by <a href="https://readthedocs.org" rel="nofollow">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<div class="rst-versions" role="note" style="cursor: pointer">
<span class="rst-current-version" data-toggle="rst-current-version">
<span><a href="../Partitioned-Writers/" style="color: #fcfcfc;">&laquo; Previous</a></span>
<span style="margin-left: 15px"><a href="../Gobblin-template/" style="color: #fcfcfc">Next &raquo;</a></span>
</span>
</div>
<script>var base_url = '../..';</script>
<script src="../../js/theme.js" defer></script>
<script src="../../js/extra.js" defer></script>
<script src="../../search/main.js" defer></script>
</body>
</html>