blob: b4815f653b814086baf3a3322b61704b968919fe [file] [log] [blame]
<!DOCTYPE html>
<!--
| Generated by Apache Maven Doxia at 2018-03-12
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="Date-Revision-yyyymmdd" content="20180312" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Operationalizing Falcon</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
<link rel="stylesheet" href="./css/site.css" />
<link rel="stylesheet" href="./css/print.css" media="print" />
<script type="text/javascript" src="./js/apache-maven-fluido-1.3.0.min.js"></script>
<script type="text/javascript">$( document ).ready( function() { $( '.carousel' ).carousel( { interval: 3500 } ) } );</script>
</head>
<body class="topBarDisabled">
<div class="container-fluid">
<div id="banner">
<div class="pull-left">
<a href="../index.html" id="bannerLeft">
<img src="images/falcon-logo.png" alt="Apache Falcon" width="200px" height="45px"/>
</a>
</div>
<div class="pull-right"> <a href="http://www.apache.org" id="bannerRight">
<img src="images/apache-feather-tm.gif" alt="Falcon" height="45px"/>
</a>
</div>
<div class="clear"><hr/></div>
</div>
<div id="breadcrumbs">
<ul class="breadcrumb">
<li class="">
<a href="http://www.apache.org" class="externalLink" title="Apache">
Apache</a>
</li>
<li class="divider ">/</li>
<li class="">
<a href="index.html" title="Falcon">
Falcon</a>
</li>
<li class="divider ">/</li>
<li class="">Operationalizing Falcon</li>
<li id="publishDate" class="pull-right">Last Published: 2018-03-12</li>
</ul>
</div>
<div class="row-fluid">
<div id="leftColumn" class="span3">
<div class="well sidebar-nav">
<ul class="nav nav-list">
<li class="nav-header">Falcon</li>
<li>
<a href="index.html" title="About">
<i class="none"></i>
About</a>
</li>
<li>
<a href="slides/falcon-overview.html" title="Overview">
<i class="none"></i>
Overview</a>
</li>
<li>
<a href="slides/falcon-user-guide.html" title="User Guide">
<i class="none"></i>
User Guide</a>
</li>
<li>
<a href="GettingStarted.html" title="Getting Started">
<i class="none"></i>
Getting Started</a>
</li>
<li>
<a href="FalconDocumentation.html" title="Architecture">
<i class="none"></i>
Architecture</a>
</li>
<li>
<a href="InstallationSteps.html" title="Installation">
<i class="none"></i>
Installation</a>
</li>
<li>
<a href="OnBoarding.html" title="On Boarding">
<i class="none"></i>
On Boarding</a>
</li>
<li>
<a href="MigrationInstructions.html" title="Migrate to 0.10">
<i class="none"></i>
Migrate to 0.10</a>
</li>
<li class="active">
<a href="#"><i class="none"></i>Operability</a>
</li>
<li>
<a href="EntitySpecification.html" title="Entity Specification">
<i class="none"></i>
Entity Specification</a>
</li>
<li>
<a href="falconcli/FalconCLI.html" title="Client (Falcon CLI)">
<i class="none"></i>
Client (Falcon CLI)</a>
</li>
<li>
<a href="restapi/ResourceList.html" title="Rest API">
<i class="icon-chevron-right"></i>
Rest API</a>
</li>
<li>
<a href="HiveIntegration.html" title="Hive Integration">
<i class="none"></i>
Hive Integration</a>
</li>
<li>
<a href="Extensions.html" title="Server side Extensions">
<i class="none"></i>
Server side Extensions</a>
</li>
<li>
<a href="Security.html" title="Security">
<i class="none"></i>
Security</a>
</li>
<li class="nav-header">Project Information</li>
<li>
<a href="project-info.html" title="Summary">
<i class="none"></i>
Summary</a>
</li>
<li>
<a href="mail-lists.html" title="Mailing Lists">
<i class="none"></i>
Mailing Lists</a>
</li>
<li>
<a href="http://webchat.freenode.net?channels=apachefalcon&uio=d4" class="externalLink" title="IRC">
<i class="none"></i>
IRC</a>
</li>
<li>
<a href="team-list.html" title="Team">
<i class="none"></i>
Team</a>
</li>
<li>
<a href="issue-tracking.html" title="Issue Tracking">
<i class="none"></i>
Issue Tracking</a>
</li>
<li>
<a href="source-repository.html" title="Source Repository">
<i class="none"></i>
Source Repository</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/Index" class="externalLink" title="Wiki">
<i class="none"></i>
Wiki</a>
</li>
<li>
<a href="license.html" title="License">
<i class="none"></i>
License</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/News" class="externalLink" title="News">
<i class="none"></i>
News</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/PoweredBy" class="externalLink" title="Powered by">
<i class="none"></i>
Powered by</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/Acknowledgements" class="externalLink" title="Acknowledgements">
<i class="none"></i>
Acknowledgements</a>
</li>
<li>
<a href="http://blogs.apache.org/falcon/" class="externalLink" title="Blog">
<i class="none"></i>
Blog</a>
</li>
<li class="nav-header">Releases</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.11" class="externalLink" title="0.11">
<i class="none"></i>
0.11</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.10" class="externalLink" title="0.10">
<i class="none"></i>
0.10</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.9" class="externalLink" title="0.9">
<i class="none"></i>
0.9</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.8" class="externalLink" title="0.8">
<i class="none"></i>
0.8</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.7" class="externalLink" title="0.7">
<i class="none"></i>
0.7</a>
</li>
<li>
<a href="http://archive.apache.org/dist/falcon/0.6.1" class="externalLink" title="0.6.1">
<i class="none"></i>
0.6.1</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.6-incubating" class="externalLink" title="0.6-incubating">
<i class="none"></i>
0.6-incubating</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.5-incubating" class="externalLink" title="0.5-incubating">
<i class="none"></i>
0.5-incubating</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.4-incubating" class="externalLink" title="0.4-incubating">
<i class="none"></i>
0.4-incubating</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.3-incubating" class="externalLink" title="0.3-incubating">
<i class="none"></i>
0.3-incubating</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/Roadmap" class="externalLink" title="Coming soon">
<i class="none"></i>
Coming soon</a>
</li>
<li class="nav-header">Documentation</li>
<li>
<a href="0.11/index.html" title="0.11 (Current)">
<i class="none"></i>
0.11 (Current)</a>
</li>
<li>
<a href="0.10/index.html" title="0.10">
<i class="none"></i>
0.10</a>
</li>
<li>
<a href="0.9/index.html" title="0.9">
<i class="none"></i>
0.9</a>
</li>
<li>
<a href="0.8/index.html" title="0.8">
<i class="none"></i>
0.8</a>
</li>
<li>
<a href="0.7/index.html" title="0.7">
<i class="none"></i>
0.7</a>
</li>
<li>
<a href="0.6.1/index.html" title="0.6.1">
<i class="none"></i>
0.6.1</a>
</li>
<li>
<a href="0.6-incubating/index.html" title="0.6-incubating">
<i class="none"></i>
0.6-incubating</a>
</li>
<li>
<a href="0.5-incubating/index.html" title="0.5-incubating">
<i class="none"></i>
0.5-incubating</a>
</li>
<li>
<a href="0.4-incubating/index.html" title="0.4-incubating">
<i class="none"></i>
0.4-incubating</a>
</li>
<li>
<a href="0.3-incubating/index.html" title="0.3-incubating">
<i class="none"></i>
0.3-incubating</a>
</li>
<li class="nav-header">ASF</li>
<li>
<a href="http://www.apache.org/foundation/how-it-works.html" class="externalLink" title="How Apache Works">
<i class="none"></i>
How Apache Works</a>
</li>
<li>
<a href="http://www.apache.org/foundation/" class="externalLink" title="Foundation">
<i class="none"></i>
Foundation</a>
</li>
<li>
<a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsoring Apache">
<i class="none"></i>
Sponsoring Apache</a>
</li>
<li>
<a href="http://www.apache.org/foundation/thanks.html" class="externalLink" title="Thanks">
<i class="none"></i>
Thanks</a>
</li>
</ul>
<hr class="divider" />
<div id="poweredBy">
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png" />
</a>
</div>
</div>
</div>
<div id="bodyColumn" class="span9" >
<div class="section">
<h2>Operationalizing Falcon<a name="Operationalizing_Falcon"></a></h2></div>
<div class="section">
<h3>Overview<a name="Overview"></a></h3>
<p>Apache Falcon provides various tools to operationalize Falcon consisting of Alerts for unrecoverable errors, Audits of user actions, Metrics, and Notifications. They are detailed below.</p>
<p>++ Lineage</p>
<p>Currently Lineage has no way to access or restore information about entity instances created during the time lineage was disabled. Information about entities however, is preserved and bootstrapped when lineage is enabled. If you have to reset the graph db then you can delete the graph db files as specified in the startup.properties and restart the falcon. Please note: you will loose all the information about the instances if you delete the graph db.</p></div>
<div class="section">
<h3>Monitoring<a name="Monitoring"></a></h3>
<p>Falcon provides monitoring of various events by capturing metrics of those events. The metric numbers can then be used to monitor performance and health of the Falcon system and the entire processing pipelines.</p>
<p>Falcon also exposes <a class="externalLink" href="https://github.com/thinkaurelius/titan/wiki/Titan-Performance-and-Monitoring">metrics for titandb</a></p>
<p>Users can view the logs of these events in the metric.log file, by default this file is created under ${user.dir}/logs/ directory. Users may also extend the Falcon monitoring framework to send events to systems like Mondemand/lwes by implementingorg.apache.falcon.plugin.MonitoringPlugin interface.</p>
<p>The following events are captured by Falcon for logging the metrics:</p>
<ol style="list-style-type: decimal">
<li>New cluster definitions posted to Falcon (success &amp; failures)</li>
<li>New feed definition posted to Falcon (success &amp; failures)</li>
<li>New process definition posted to Falcon (success &amp; failures)</li>
<li>Process update events (success &amp; failures)</li>
<li>Feed update events (success &amp; failures)</li>
<li>Cluster update events (success &amp; failures)</li>
<li>Process suspend events (success &amp; failures)</li>
<li>Feed suspend events (success &amp; failures)</li>
<li>Process resume events (success &amp; failures)</li>
<li>Feed resume events (success &amp; failures)</li>
<li>Process remove events (success &amp; failures)</li>
<li>Feed remove events (success &amp; failures)</li>
<li>Cluster remove events (success &amp; failures)</li>
<li>Process instance kill events (success &amp; failures)</li>
<li>Process instance re-run events (success &amp; failures)</li>
<li>Process instance generation events</li>
<li>Process instance failure events</li>
<li>Process instance auto-retry events</li>
<li>Process instance retry exhaust events</li>
<li>Feed instance deletion event</li>
<li>Feed instance deletion failure event (no retries)</li>
<li>Feed instance replication event</li>
<li>Feed instance replication failure event</li>
<li>Feed instance replication auto-retry event</li>
<li>Feed instance replication retry exhaust event</li>
<li>Feed instance late arrival event</li>
<li>Feed instance post cut-off arrival event</li>
<li>Process re-run due to late feed event</li>
<li>Transaction rollback failed event</li></ol>
<p>The metric logged for an event has the following properties:</p>
<ol style="list-style-type: decimal">
<li>Action - Name of the event.</li>
<li>Dimensions - A list of name/value pairs of various attributes for a given action.</li>
<li>Status- Status of an action FAILED/SUCCEEDED.</li>
<li>Time-taken - Time taken in nanoseconds for a given action.</li></ol>
<p>An example for an event logged for a submit of a new process definition:</p>
<p>2012-05-04 12:23:34,026 {Action:submit, Dimensions:{entityType=process}, Status: SUCCEEDED, Time-taken:97087000 ns}</p>
<p>Users may parse the metric.log or capture these events from custom monitoring frameworks and can plot various graphs or send alerts according to their requirements.</p></div>
<div class="section">
<h3>Notifications<a name="Notifications"></a></h3>
<p>Falcon has two types of notifications - System and User notifications.</p></div>
<div class="section">
<h4>System notifications<a name="System_notifications"></a></h4>
<p>The System notifications are internally generated and used by Falcon to monitor the Falcon orchestrated workflow jobs. By default, Falcon starts an ActiveMQ embedded JMS server on Falcon machine on port 61616 as a daemon. Alternatively, users can make Falcon to use an existing JMS server instead of starting an embedded instance by doing the following 2 steps:</p>
<p></p>
<ul>
<li>Setting the property broker.url in the startup.properties as below</li></ul>
<div class="source">
<pre>
*.broker.url=tcp://jms-server-host:61616
</pre></div>
<p></p>
<ul>
<li>Set the system property falcon.embeddedmq to false as below</li></ul>
<div class="source">
<pre>
&lt;FALCON-INSTALL-DIR&gt;/bin/falcon-start -Dfalcon.embeddedmq=false
</pre></div>
<p>Falcon uses FALCON.ENTITY.TOPIC to publish system notifications. This topic and the Map Message fields are internal and could change between releases.</p></div>
<div class="section">
<h4>User notifications<a name="User_notifications"></a></h4>
<p>Falcon, in addition to the FALCON.ENTITY.TOPIC, also creates a JMS topic for every process/feed that is scheduled in Falcon as part of User notification. To enable User notifications, the broker url and implementation class of the JMS engine need to be specified in the cluster definition associated with the feed/process. Users may register consumers on the required topic to check the availability or status of feed instances. The User notification JMS broker instance can be same as the System notification or different.</p>
<p>The name of the JMS topic is same as the process/feed name. Falcon sends a map message for every feed instance that is created/deleted/replicated/imported/exported to the JMS topic. The JMS Map Message sent to a topic has the following fields:</p>
<p></p>
<ol style="list-style-type: decimal">
<li>cluster - name of the current cluster the feed/process is dependent on.</li>
<li>entityType - type of the entity (feed or process).</li>
<li>entityName - name of the entity.</li>
<li>nominalTime - instance time (or data date).</li>
<li>operation - operation like generate, delete, replicate, import, export.</li>
<li>feedNames - name of the feeds which are generated/replicated/deleted/imported/exported.</li>
<li>feedInstancePaths - comma separated feed instance paths.</li>
<li>workflowId - current workflow-id of the instance.</li>
<li>workflowUser - user who owns the feed instance (i.e partition).</li>
<li>runId - current run-id of the instance.</li>
<li>status - status of the user workflow instance.</li>
<li>timeStamp - current timestamp.</li>
<li>logDir - log dir where lineage can be recorded.</li></ol>
<p>The JMS messages are automatically purged after a certain period (default 3 days) by the Falcon JMS house-keeping service. TTL (Time-to-live) for JMS message can be configured in the Falcon's startup.properties file.</p>
<p>The following example shows how to enable and read user notification by connecting to the JMS broker.</p>
<p>First, specify the JMS broker url in the cluster definition XML as shown below.</p>
<div class="source">
<pre>
&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;!-- filename : primaryCluster.xml --&gt;
&lt;cluster colo=&quot;USWestOregon&quot; description=&quot;oregonHadoopCluster&quot; name=&quot;primaryCluster&quot; xmlns=&quot;uri:falcon:cluster:0.1&quot;&gt;
&lt;interfaces&gt;
...
...
&lt;interface type=&quot;messaging&quot; endpoint=&quot;tcp://user-jms-broker-host:61616?daemon=true&quot; version=&quot;5.1.6&quot; /&gt;
...
&lt;/interfaces&gt;
&lt;/cluster&gt;
</pre></div>
<p>Next, use a JMS consumer (example below in Java) to read the message from the topic with the name FALCON.&lt;feed-or-process-name&gt;</p>
<div class="source">
<pre>
import org.apache.activemq.ActiveMQConnectionFactory;
import org.apache.activemq.command.ActiveMQMapMessage;
import javax.jms.ConnectionFactory;
import javax.jms.Connection;
import javax.jms.MessageConsumer;
import javax.jms.Topic;
import javax.jms.Session;
import javax.jms.TopicSession;
public class FalconUserJMSClient {
public static void main(String[] args)throws Exception {
// Note: specify the JMS broker URL
String brokerUrl = &quot;tcp://localhost:61616&quot;;
ConnectionFactory connectionFactory = new ActiveMQConnectionFactory(brokerUrl);
Connection connection = connectionFactory.createConnection();
connection.setClientID(&quot;Falcon User JMS Consumer&quot;);
TopicSession session = (TopicSession) connection.createSession(false, Session.AUTO_ACKNOWLEDGE);
try {
// Note: the topic name for the feed will be FALCON.&lt;feed-name&gt;
Topic falconTopic = session.createTopic(&quot;FALCON.feed-sample&quot;);
MessageConsumer consumer = session.createConsumer(falconTopic);
connection.start();
while (true) {
ActiveMQMapMessage msg = (ActiveMQMapMessage) consumer.receive();
System.out.println(&quot;cluster : &quot; + msg.getString(&quot;cluster&quot;));
System.out.println(&quot;entityType : &quot; + msg.getString(&quot;entityType&quot;));
System.out.println(&quot;entityName : &quot; + msg.getString(&quot;entityName&quot;));
System.out.println(&quot;nominalTime : &quot; + msg.getString(&quot;nominalTime&quot;));
System.out.println(&quot;operation : &quot; + msg.getString(&quot;operation&quot;));
System.out.println(&quot;feedNames : &quot; + msg.getString(&quot;feedNames&quot;));
System.out.println(&quot;feedInstancePaths : &quot; + msg.getString(&quot;feedInstancePaths&quot;));
System.out.println(&quot;workflowId : &quot; + msg.getString(&quot;workflowId&quot;));
System.out.println(&quot;workflowUser : &quot; + msg.getString(&quot;workflowUser&quot;));
System.out.println(&quot;runId : &quot; + msg.getString(&quot;runId&quot;));
System.out.println(&quot;status : &quot; + msg.getString(&quot;status&quot;));
System.out.println(&quot;timeStamp : &quot; + msg.getString(&quot;timeStamp&quot;));
System.out.println(&quot;logDir : &quot; + msg.getString(&quot;logDir&quot;));
System.out.println(&quot;brokerUrl : &quot; + msg.getString(&quot;brokerUrl&quot;));
System.out.println(&quot;brokerImplClass : &quot; + msg.getString(&quot;brokerImplClass&quot;));
System.out.println(&quot;logFile : &quot; + msg.getString(&quot;logFile&quot;));
System.out.println(&quot;topicName : &quot; + msg.getString(&quot;topicName&quot;));
System.out.println(&quot;brokerTTL : &quot; + msg.getString(&quot;brokerTTL&quot;));
}
} finally {
if (session != null) {
session.close();
}
if (connection != null) {
connection.close();
}
}
}
}
</pre></div></div>
<div class="section">
<h3>Alerts<a name="Alerts"></a></h3>
<p>Falcon generates alerts for unrecoverable errors into a log file by default. Users can view these alerts in the alerts.log file, by default this file is created under ${user.dir}/logs/ directory.</p>
<p>Users may also extend the Falcon Alerting plugin to send events to systems like Nagios, etc. by extending org.apache.falcon.plugin.AlertingPlugin interface.</p></div>
<div class="section">
<h3>Audits<a name="Audits"></a></h3>
<p>Falcon audits all user activity and captures them into a log file by default. Users can view these audits in the audit.log file, by default this file is created under ${user.dir}/logs/ directory.</p>
<p>Users may also extend the Falcon Audit plugin to send audits to systems like Apache Argus, etc. by extending org.apache.falcon.plugin.AuditingPlugin interface.</p></div>
<div class="section">
<h3>Metrics Collection In Graphite<a name="Metrics_Collection_In_Graphite"></a></h3>
<p>Falcon has support to send metrics to graphite more details regarding this can be found on <a href="./GraphiteMetricCollection.html">Graphite Metric Collection</a></p></div>
</div>
</div>
</div>
<hr/>
<footer>
<div class="container-fluid">
<div class="row span12">Copyright &copy; 2013-2018
<a href="http://www.apache.org">Apache Software Foundation</a>.
All Rights Reserved.
</div>
</div>
</footer>
</body>
</html>