blob: a3a0fbc7f4caccfcd568b8f80d81f643462b4ba7 [file] [log] [blame]
<!doctype html>
<!--[if lt IE 7]><html lang="en-US" class="no-js lt-ie9 lt-ie8 lt-ie7"><![endif]-->
<!--[if (IE 7)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9 lt-ie8"><![endif]-->
<!--[if (IE 8)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9"><![endif]-->
<!--[if gt IE 8]><!-->
<html lang="en-US" class="no-js">
<!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>Apache Spot Product Architecture Overview - Apache Spot</title>
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="apple-touch-icon" href="../../library/images/apple-touch-icon.png">
<link rel="icon" href="../../favicon.png">
<!--[if IE]>
<link rel="shortcut icon" href="http://spot.incubator.apache.org/favicon.ico">
<![endif]-->
<meta name="msapplication-TileColor" content="#f01d4f">
<meta name="msapplication-TileImage" content="../../library/images/win8-tile-icon.png">
<meta name="theme-color" content="#121212">
<link rel='dns-prefetch' href='//fonts.googleapis.com' />
<link rel='dns-prefetch' href='//s.w.org' />
<link rel="alternate" type="application/rss+xml" title="Apache Spot &raquo; Feed" href="../../feed/" />
<link rel='stylesheet' id='googleFonts-css' href='http://fonts.googleapis.com/css?family=Lato%3A400%2C700%2C400italic%2C700italic' type='text/css' media='all' />
<link rel='stylesheet' id='bones-stylesheet-css' href='../../library/css/style.css' type='text/css' media='all' />
<!--[if lt IE 9]>
<link rel='stylesheet' id='bones-ie-only-css' href='http://spot.incubator.apache.org/library/css/ie.css' type='text/css' media='all' />
<![endif]-->
<link rel='stylesheet' id='mm-css-css' href='../../library/css/meanmenu.css' type='text/css' media='all' />
<script type='text/javascript' src='../../library/js/libs/modernizr.custom.min.js'></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.1/jquery.min.js"></script>
<script type='text/javascript' src='../../library/js/jquery-migrate.min.js'></script>
<script type='text/javascript' src='../../library/js/jquery.meanmenu.js'></script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-87470508-1', 'auto');
ga('send', 'pageview');
</script>
</head>
<body class="single single-post">
<div id="container">
<header class="header">
<div id="inner-header" class="wrap cf">
<p id="logo" class="h1" itemscope itemtype="http://schema.org/Organization">
<a href="http://spot.incubator.apache.org/" rel="nofollow"><img src="../../library/images/logo.png" alt="Apache Spot" /></a>
</p>
<nav>
<ul id="menu-main-menu" class="nav top-nav cf">
<li id="menu-item-129" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-129">
<a href="../../get-started">Get Started</a>
<ul class="sub-menu">
<li><a href="../../get-started">Get Started</a></li>
<li><a href="../../get-started/supporting-apache">Supporting Apache</a></li>
<li><a href="../../get-started/environment">Environment</a></li>
<li><a href="../../get-started/architecture">Architecture</a></li>
<li><a href="../../get-started/demo">Demo</a></li>
</ul>
</li>
<li id="menu-item-5" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-5">
<a href="../../download">Download</a>
</li>
<li id="menu-item-130" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-130">
<a href="../../community">Community</a>
<ul class="sub-menu com-sm">
<li class="dropmenu-head">Get in Touch</li>
<li><a href="../../community" class="mail">Mailing Lists</a></li>
<li class="divider"></li>
<li><a href="../../community/committers">Project Committers</a></li>
<li><a href="../../community/contribute">How to Contribute</a></li>
<li class="divider"></li>
<li class="dropmenu-head">Developer Resources</li>
<li><a href="https://github.com/apache/incubator-spot" target="_blank" class="github">Github</a></li>
<li><a href="https://issues.apache.org/jira/browse/SPOT/" target="_blank" class="jira">JIRA Issue Tracker</a></li>
<li><a href="https://cwiki.apache.org/confluence/pages/viewpage.action?spaceKey=SPOT&title=Apache+Spot+%28Incubating%29+Home" target="_blank" class="">Confluence Site</a></li> <li class="divider"></li>
<li class="dropmenu-head">Social Media</li>
<li><a href="https://twitter.com/ApacheSpot" target="_blank" class="twitter-icon">Twitter</a></li>
</ul>
</li>
<li id="menu-item-106" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-106">
<a href="../../doc">Documentation</a>
</li>
<li class="menu-item menu-item-has-children">
<a href="#">Project Components</a>
<ul class="sub-menu">
<li><a href="../../project-components/ingestion">Ingestion</a></li>
<li><a href="../../project-components/machine-learning">Machine Learning</a></li>
<li><a href="../../project-components/suspicious-connects-analysis">Suspicous Connects Analysis</a></li>
<li><a href="../../project-components/visualization">Visualization</a></li>
<li class="under-dev">Under Development</li>
<li><a href="../../project-components/open-data-models">Open Data Models</a></li>
</ul>
</li>
<li id="menu-item-13" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-13 active">
<a href="../../blog">Blog</a>
</li>
</ul>
</nav>
</div>
</header>
<div id="mobile-nav"></div>
<div id="content">
<div id="inner-content" class="wrap cf">
<main id="main" class="m-all t-2of3 d-5of7 cf" role="main" itemscope itemprop="mainContentOfPage" itemtype="http://schema.org/Blog">
<article class="cf post type-post status-publish format-standard hentry category-uncategorized" role="article" itemscope itemprop="blogPost" itemtype="http://schema.org/BlogPosting">
<header class="article-header entry-header">
<h1 class="entry-title single-title" itemprop="headline" rel="bookmark">Apache Spot Product Architecture Overview</h1>
<p class="byline entry-meta vcard">
<time class="updated entry-time" datetime="2017-03-27" itemprop="datePublished">
March 27, 2017
</time>
</span>
</p>
</header>
<section class="entry-content cf" itemprop="articleBody">
<img src="../../library/images/blog/2017/03/product-architecture-overview.png" alt="" />
<h3>Data Sources</h3>
<p>Spot can directly collect netflow data, DNS data and/or proxy data. This data can be collected also from a SIEM or from a common logging server. Additional data types can be collected using Open Data Model. Any number of data sources can be analyzed using Spot. Because most of these data sources represent a large volume of data, most organizations start with the data source that represents the area of highest risk.</p>
<h3>Data Storage</h3>
<p>Using nfdump for netflow data, TShark for DNS data and a parser for proxy data, the Spot collectors process the information that is sent. This data is ingested into Spot’s HDFS. Once Spot has received 3-4 hours of data, the analysis to detect suspicious connections using machine learning algorithms is performed.</p>
<h3>Data Analysis and Machine Learning</h3>
<p>The machine learning component of Spot contains routines for performing suspicious connections analyses on netflow, DNS or proxy logs gathered from a network. These analyses consume a collection of network events and produce a list of the events that are considered to be the least probable and most suspicious. They rely on the ingest component of Spot to collect and load netflow, DNS and proxy records.</p>
<p>Spot uses topic modeling to discover normal and abnormal behavior. It treats the collection of logs related to an IP as a document and uses Latent Dirichlet Allocation (LDA) to discover hidden semantic structures in the collection of such documents.</p>
<p>Spot infers a probabilistic model for the network behavior of each IP address. Each network log entry is assigned an estimated probability (score) by the model. The events with lower scores are flagged as “suspicious” for further analysis.</p>
<blockquote>LDA is a generative probabilistic model used for discrete data such as text corpora. LDA is a three- level Bayesian model in which each word of a document is generated from a mixture of an underlying set of topics [1]. We apply LDA to network traffic by converting network log entries into words through aggregation and discretization. In this manner, documents correspond to IP addresses, words to log entries (related to an IP address) and topics to profiles of common network activity.</blockquote>
<h3>Analytics</h3>
<p>Context is then added to the results generated by machine learning algorithms. The results are enriched, with relocalization and threat reputation for each connection, accelerating the detection of compromise indicators.</p>
<h3>Visualization</h3>
<p>The top 300 suspicious results are sent to the Spot GUI to visualize. With the Spot GUI, the top suspicious network activity can be reviewed and the user can engage with data right in the browser. The Spot GUI can also be used to execute advanced search or create a storyboard of the security threats. It also takes advantage of latest Web technologies to provide Web Components (ReactJS+Flux), amazing user experience (Bootstrap + D3), data manipulation (IPython notebooks) and easy access to data using GraphQL.</p>
<h3>New in March 10, 2017 Release</h3>
<ul>
<li>Use GraphQL to query data from HDFS Parquet files instead of Local File System CSVs files</li>
<li>Modify OA module to save data in HDFS instead of CSVs files</li>
<li>Create API to get Spot data from HDFS using Impala</li>
<li>Modify ML component to read feedback direct from HDFS</li>
<li>Database schema change to store spot data</li>
</ul>
<h3>Coming in Future Releases</h3>
<p>In the next releases, Apache Spot will share IoC (suspicious results ranked) with other security tools. Suspicious results scored as a critical can be shared using McAfee Open DXL with the Open Security Controller and/or McAfee ePO to adjust or tune your security policies in real time.</p>
<p class="small">[1] Blei, David M., Andrew Y. Ng, and Michael I. Jordan. “Latent Dirichlet Allocation.” Journal of Machine Learning Research 3, no. Jan (2003): 993-1022.</p>
</section>
<footer class="article-footer">
filed under: <a href="../../category/uncategorized/" rel="category tag">Uncategorized</a>
</footer>
</article>
</main>
<div id="sidebar1" class="sidebar m-all t-1of3 d-2of7 last-col cf" role="complementary">
<div id="recent-posts-2" class="widget widget_recent_entries">
<h4 class="widgettitle">Recent Posts</h4>
<ul>
<li>
<a href="../../blog/apache-spot-product-architecture-overview/">Apache Spot Product Architecture Overview</a>
</li>
<li>
<a href="../../blog/strength-in-numbers-why-consider-open-source-cybersecurity-analytics/">Strength in Numbers: Why Consider Open Source Cybersecurity Analytics</a>
</li>
<li>
<a href="../../blog/jupyter-notebooks-for-data-analysis/">Jupyter Notebooks for Data Analysis</a>
</li>
<li>
<a href="../../blog/apache-spot-and-cybersecurity-using-netflows-to-detect-threats-to-critical-infrastructure/">Apache Spot (Incubating) and Cybersecurity — Using NetFlows to Detect Threats to Critical Infrastructure</a>
</li>
<li>
<a href="../../blog/how-apache-spot-helps-create-well-stocked-data-lakes-and-catch-powerful-insights/">How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights</a>
</li>
<li>
<a href="../../blog/apache-spot-3-most-asked-questions/">Apache Spot (Incubating): 3 Most-Asked Questions</a>
</li>
</ul>
</div>
<div id="archives-2" class="widget widget_archive">
<h4 class="widgettitle">Archives</h4>
<ul>
<li>
<a href='../../2017/03/'>March 2017</a>
</li>
<li>
<a href='../../2016/10/'>October 2016</a>
</li>
<li>
<a href='../../2016/09/'>September 2016</a>
</li>
<li>
<a href='../../2016/08/'>August 2016</a>
</li>
<li>
<a href='../../2016/03/'>March 2016</a>
</li>
</ul>
</div>
</div>
</div>
</div>
<footer class="footer" role="contentinfo" itemscope itemtype="http://schema.org/WPFooter">
<div id="inner-footer" class="wrap cf">
<p class="source-org copyright" style="text-align:center;">
&copy; 2020 Apache Spot.
</p>
</div>
</footer>
</div>
<a href="#0" class="cd-top">Top</a>
<script type='text/javascript' src='../../library/js/scripts.js'></script>
</body>
</html>