blob: ebb0a235b065e45c5126b30edf1d4c0c0b34692e [file] [log] [blame]
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>Eagle - Use Cases</title>
<meta name="description" content="Eagle - Analyze Big Data Platforms for Security and Performance">
<meta name="keywords" content="Eagle, Hadoop, Security, Real Time">
<meta name="author" content="eBay Inc.">
<meta charset="utf-8">
<meta name="viewport" content="initial-scale=1">
<link rel="stylesheet" href="/css/animate.css">
<link rel="stylesheet" href="/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/font-awesome.min.css">
<link rel="stylesheet" href="/css/misc.css">
<link rel="stylesheet" href="/css/style.css">
<link rel="stylesheet" href="/css/styles.css">
<link rel="stylesheet" href="/css/main.css">
<link rel="alternate" type="application/rss+xml" title="Eagle" href="http://goeagle.io/feed.xml" />
<link rel="shortcut icon" href="/images/favicon.png">
<!-- Baidu Analytics Tracking-->
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "//hm.baidu.com/hm.js?fedc55df2ea52777a679192e8f849ece";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>
<!-- Google Analytics Tracking -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-68929805-1', 'auto');
ga('send', 'pageview');
</script>
</head>
<body>
<!-- header start -->
<div id="home_page">
<div class="topbar">
<div class="container">
<div class="row" >
<nav class="navbar navbar-default">
<div class="container-fluid">
<!-- Brand and toggle get grouped for better mobile display -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button>
<a class="navbar-brand" href="/"><img src="/images/logo2.png" height="44px" style="margin-top:-7px"></a> </div>
<!-- Collect the nav links, forms, and other content for toggling -->
<!-- <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-right" id="top-menu">
<li><a class="menu" href="/#home_page">HOME</a></li>
<li><a class="menu" href="/docs/">DOCS</a></li>
<li><a class="menu" href="/#about_page">ABOUT</a></li>
<li><a class="menu" href="/#diagram_page">ARCHITECTURE</a></li>
<li><a class="menu" href="/#modules_page">MODULES</a></li>
<li><a class="menu" href="/#usecase_page">USE CASES</a></li>
<li>
</li>
</ul> -->
</div>
</div>
<!-- /.container-fluid -->
</nav>
</div>
</div>
</div>
<div class="headerimage">
<div class="flexslider">
<ul class="slides">
<li><img src="/images/slider/4.jpg" alt="Slide 1"></li>
</ul>
</div>
</div>
<div class="particles" style="height:40%"> </div><!---particles-->
<div class="slider-caption" style="top:80px;">
<div class="homewrapper">
<div class="hometitle">
<a href="/">
<img src="/images/feather.png" height="80px">
</a>
</div>
<div class="hometext">
<h2>Analyze Big Data Platforms For Security and Performance</h2>
<div class="social-buttons">
<a href="https://github.com/apache/eagle"><i class="fa fa-github"></i></a>
<a href="http://twitter.com/TheApacheEagle"><i class="fa fa-twitter"></i></a>
<a href="https://www.facebook.com/TheApacheEagle/"><i class="fa fa-facebook"></i></a>
<a href="#"><i class="fa fa-weixin"></i></a>
<!-- <a href="https://www.weibo.com/ApacheEagle/"><i class="fa fa-weibo"></i></a> -->
</div>
</div>
</div>
</div>
</div>
<!-- header end -->
<div class="container-fluid page-content">
<div class="row">
<div class="col-md-10 col-md-offset-1">
<!-- sidebar -->
<div class="col-xs-6 col-sm-3" id="sidebar" role="navigation">
<ul class="nav" id="adminnav">
<li class="heading">Getting Started</li>
<li class="sidenavli "><a href="/docs/index.html" data-permalink="/docs/usecases.html" id="">Introduction</a></li>
<li class="sidenavli current"><a href="/docs/usecases.html" data-permalink="/docs/usecases.html" id="">Use Cases</a></li>
<li class="sidenavli "><a href="/docs/terminology.html" data-permalink="/docs/usecases.html" id="">Terminology</a></li>
<li class="sidenavli "><a href="/docs/ecosystem.html" data-permalink="/docs/usecases.html" id="">Ecosystem</a></li>
<li class="sidenavli "><a href="/docs/community.html" data-permalink="/docs/usecases.html" id="">Community</a></li>
<li class="sidenavli "><a href="/docs/FAQ.html" data-permalink="/docs/usecases.html" id="">FAQ</a></li>
<li class="divider"></li>
<li class="heading">Documentations</li>
<li class="sidenavli "><a href="/docs/latest/" data-permalink="/docs/usecases.html" id="">Latest version (v0.5.0)</a></li>
<li class="divider"></li>
<li class="heading">Download</li>
<li class="sidenavli "><a href="/docs/download-latest.html" data-permalink="/docs/usecases.html" id="">Latest version (v0.5.0)</a></li>
<li class="sidenavli "><a href="/docs/download.html" data-permalink="/docs/usecases.html" id="">Archived</a></li>
<li class="divider"></li>
<li class="heading">Supplement</li>
<li class="sidenavli "><a href="/docs/security.html" data-permalink="/docs/usecases.html" id="">Security</a></li>
<li class="divider"></li>
<li class="sidenavli">
<a href="mailto:dev@eagle.apache.org" target="_blank">Need Help?</a>
</li>
</ul>
</div>
<div class="col-xs-6 col-sm-9 page-main-content" style="margin-left: -15px" id="loadcontent">
<h1 class="page-header" style="margin-top: 0px">Use Cases</h1>
<h3 id="data-activity-monitoring">Data Activity Monitoring</h3>
<ul>
<li>
<p>Data activity represents how user explores data provided by big data platforms. Analyzing data activity and alerting for insecure access are fundamental requirements for securing enterprise data. As data volume is increasing exponentially with Hadoop<sup id="fnref:HADOOP"><a href="#fn:HADOOP" class="footnote">1</a></sup>, Hive<sup id="fnref:HIVE"><a href="#fn:HIVE" class="footnote">2</a></sup>, Spark<sup id="fnref:SPARK"><a href="#fn:SPARK" class="footnote">3</a></sup> technology, understanding data activities for every user becomes extremely hard, let alone to alert for a single malicious event in real time among petabytes streaming data per day.</p>
</li>
<li>
<p>Securing enterprise data starts from understanding data activities for every user. Apache Eagle (called Eagle in the following) has integrated with many popular big data platforms e.g. Hadoop, Hive, Spark, Cassandra<sup id="fnref:CASSANDRA"><a href="#fn:CASSANDRA" class="footnote">4</a></sup> etc. With Eagle user can browse data hierarchy, mark sensitive data and then create comprehensive policy to alert for insecure data access.</p>
</li>
</ul>
<h3 id="job-performance-analytics">Job Performance Analytics</h3>
<ul>
<li>
<p>Running map/reduce job is the most popular way people use to analyze data in Hadoop system. Analyzing job performance and providing tuning suggestions are critical for Hadoop system stability, job SLA and resource usage etc.</p>
</li>
<li>
<p>Eagle analyzes job performance with two complementing approaches. First Eagle periodically takes snapshots for all running jobs with YARN API, secondly Eagle continuously reads job lifecycle events immediately after the job is completed. With the two approaches, Eagle can analyze single job’s trend, data skew problem, failure reasons etc. More interestingly, Eagle can analyze whole Hadoop cluster’s performance by taking into account all jobs.</p>
</li>
</ul>
<h3 id="node-anomaly-detection">Node Anomaly Detection</h3>
<ul>
<li>
<p>One of practical benefits from analyzing map/reduce job is node anomaly detection. Big data platform like Hadoop may involve thousands of nodes for supporting multi-tenant jobs. One bad node may not crash whole cluster thanks to failure tolerance design, but may affect specific jobs and cause a lot of rescheduling, job delay and hurt stability of whole cluster etc.</p>
</li>
<li>
<p>Eagle developed out-of-the-box algorithm to compare task failure ratio for each node in a large cluster. If one node continues to fail running tasks, it may have potential issues, sometimes one of its disks is full or fails etc. In a nutshell, if one node behaves very differently from all other nodes within one large cluster, this node is anomalous and we should take action.</p>
</li>
</ul>
<h3 id="cluster-performance-analytics">Cluster Performance Analytics</h3>
<ul>
<li>
<p>It is critical to understand why a cluster performs bad. Is that because of some crazy jobs recently onboarded, or huge amount of tiny files, or namenode performance degrading?</p>
</li>
<li>
<p>Eagle in realtime calculates resource usage per minute out of individual jobs, e.g. CPU, memory, HDFS IO bytes, HDFS IO numOps etc. and also collects namenode JMX metrics. Correlating them together will easily help system adminstrator find root cause for cluster slowness.</p>
</li>
</ul>
<h3 id="cluster-resource-usage-trend">Cluster Resource Usage Trend</h3>
<ul>
<li>
<p>YARN manages resource allocation through queue in a large Hadoop cluster. Cluster resource usage is exactly reflected by overall queue usage.</p>
</li>
<li>
<p>Eagle in realtime collects queue statistics and provide insights of cluster resource usage.</p>
</li>
</ul>
<hr />
<h4 id="footnotes"><em>Footnotes</em></h4>
<div class="footnotes">
<ol>
<li id="fn:HADOOP">
<p><em>All mentions of “hadoop” on this page represent Apache Hadoop.</em>&nbsp;<a href="#fnref:HADOOP" class="reversefootnote">&#8617;</a></p>
</li>
<li id="fn:HIVE">
<p><em>All mentions of “hive” on this page represent Apache Hive.</em>&nbsp;<a href="#fnref:HIVE" class="reversefootnote">&#8617;</a></p>
</li>
<li id="fn:SPARK">
<p><em>All mentions of “spark” on this page represent Apache Spark.</em>&nbsp;<a href="#fnref:SPARK" class="reversefootnote">&#8617;</a></p>
</li>
<li id="fn:CASSANDRA">
<p><em>Apache Cassandra.</em>&nbsp;<a href="#fnref:CASSANDRA" class="reversefootnote">&#8617;</a></p>
</li>
</ol>
</div>
</div><!--end of loadcontent-->
</div>
<!--end of centered content-->
</div>
</div>
<!--end of container-->
<!-- footer start -->
<div class="footerwrapper">
<div class="container">
<div class="row">
<div class="col-md-12"><div style="margin-left:auto; margin-right:auto; text-align:center;font-size: 12px">
<div>
</div>
<div>
<a href="http://www.apache.org">
<img id="asf-logo" alt="Apache Software Foundation" src="/images/apache-logo-small.gif">
</a>
</div>
<div>
Copyright © 2015 <a href="http://www.apache.org">The Apache Software Foundation</a>, Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</div>
<div>
Apache Eagle, Eagle, Apache Hadoop, Hadoop, Apache HBase, HBase, Apache Hive, Hive, Apache Ambari, Ambari, Apache Spark, Spark, Apache Kafka, Kafka, Apache Storm, Storm, Apache Maven, Maven, Apache Tomcat, Tomcat, Apache Derby, Derby, Apache Cassandra, Cassandra, Apache ZooKeeper, ZooKeeper, Apache, the Apache feather logo, and the Apache project logo are trademarks of The Apache Software Foundation.
</div>
</div></div>
</div>
</div>
</div>
<!-- footer end -->
<!-- JavaScripts -->
<script src="/js/jquery-1.11.1.min.js"></script>
<script src="/js/jquery.singlePageNav.js"></script>
<script src="/js/jquery.flexslider.js"></script>
<script src="/js/modernizr.min.js"></script>
<script src="/js/svg.js"></script>
<script>
/************** FlexSlider *********************/
$('.flexslider').flexslider({
animation: "fade",
directionNav: false
});
</script>
<script>
/************** FlexSlider *********************/
$('.flexslider').flexslider({
animation: "fade",
directionNav: false
});
</script>
</body>
</html>