<!doctype html>

<!--[if lt IE 7]><html lang="en-US" class="no-js lt-ie9 lt-ie8 lt-ie7"><![endif]-->
<!--[if (IE 7)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9 lt-ie8"><![endif]-->
<!--[if (IE 8)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9"><![endif]-->
<!--[if gt IE 8]><!-->
<html lang="en-US" class="no-js">
    <!--<![endif]-->

    <head>
        <meta charset="utf-8">

        <meta http-equiv="X-UA-Compatible" content="IE=edge">

        <title>How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights - Apache Spot</title>

        <meta name="HandheldFriendly" content="True">
        <meta name="MobileOptimized" content="320">
        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="apple-touch-icon" href="../../library/images/apple-touch-icon.png">
        <link rel="icon" href="../../favicon.png">
        <!--[if IE]>
        <link rel="shortcut icon" href="http://spot.incubator.apache.org/favicon.ico">
        <![endif]-->
        <meta name="msapplication-TileColor" content="#f01d4f">
        <meta name="msapplication-TileImage" content="../../library/images/win8-tile-icon.png">
        <meta name="theme-color" content="#121212">

        <link rel='dns-prefetch' href='//fonts.googleapis.com' />
        <link rel='dns-prefetch' href='//s.w.org' />
        <link rel="alternate" type="application/rss+xml" title="Apache Spot &raquo; Feed" href="../../feed/" />

        <link rel='stylesheet' id='googleFonts-css'  href='http://fonts.googleapis.com/css?family=Lato%3A400%2C700%2C400italic%2C700italic' type='text/css' media='all' />
        <link rel='stylesheet' id='bones-stylesheet-css'  href='../../library/css/style.css' type='text/css' media='all' />
        <!--[if lt IE 9]>
        <link rel='stylesheet' id='bones-ie-only-css'  href='http://spot.incubator.apache.org/library/css/ie.css' type='text/css' media='all' />
        <![endif]-->
        <link rel='stylesheet' id='mm-css-css'  href='../../library/css/meanmenu.css' type='text/css' media='all' />
        <script type='text/javascript' src='../../library/js/libs/modernizr.custom.min.js'></script>
        <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.1/jquery.min.js"></script>
        <script type='text/javascript' src='../../library/js/jquery-migrate.min.js'></script>
        <script type='text/javascript' src='../../library/js/jquery.meanmenu.js'></script>

		<script>
		  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
		  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
		  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
		  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

		  ga('create', 'UA-87470508-1', 'auto');
		  ga('send', 'pageview');

		</script>
    </head>

    <body class="single single-post">

        <div id="container">
            <header class="header">

                <div id="inner-header" class="wrap cf">

                    <p id="logo" class="h1" itemscope itemtype="http://schema.org/Organization">
                        <a href="http://spot.incubator.apache.org/" rel="nofollow"><img src="../../library/images/logo.png" alt="Apache Spot" /></a>
                    </p>

                    <nav>
                        <ul id="menu-main-menu" class="nav top-nav cf">
                          <li id="menu-item-129" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-129">
                              <a href="../../get-started">Get Started</a>
                              <ul class="sub-menu">
                                <li><a href="../../get-started">Get Started</a></li>
                                <li><a href="../../get-started/supporting-apache">Supporting Apache</a></li>
                                <li><a href="../../get-started/environment">Environment</a></li>
                                <li><a href="../../get-started/architecture">Architecture</a></li>
                                <li><a href="../../get-started/demo">Demo</a></li>
                              </ul>
                            </li>
                            <li id="menu-item-5" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-5">
                                <a href="../../download">Download</a>
                            </li>
                            <li id="menu-item-130" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-130">
                                <a href="../../community">Community</a>
                                <ul class="sub-menu com-sm">
                                	<li class="dropmenu-head">Get in Touch</li>
                                	<li><a href="../../community" class="mail">Mailing Lists</a></li>
                                	<li class="divider"></li>
                                	<li><a href="../../community/committers">Project Committers</a></li>
                                	<li><a href="../../community/contribute">How to Contribute</a></li>
                                	<li class="divider"></li>
                                	<li class="dropmenu-head">Developer Resources</li>
                                	<li><a href="https://github.com/apache/incubator-spot" target="_blank" class="github">Github</a></li>
                                	<li><a href="https://issues.apache.org/jira/browse/SPOT/" target="_blank" class="jira">JIRA Issue Tracker</a></li>
<li><a href="https://cwiki.apache.org/confluence/pages/viewpage.action?spaceKey=SPOT&title=Apache+Spot+%28Incubating%29+Home" target="_blank" class="">Confluence Site</a></li>                                	<li class="divider"></li>
                                	<li class="dropmenu-head">Social Media</li>
                                	<li><a href="https://twitter.com/ApacheSpot" target="_blank" class="twitter-icon">Twitter</a></li>
                                </ul>
                            </li>
                            <li id="menu-item-106" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-106">
                                <a href="../../doc">Documentation</a>
                            </li>
                            <li class="menu-item menu-item-has-children">
                                <a href="#">Project Components</a>
                                <ul class="sub-menu">
                                	<li><a href="../../project-components/ingestion">Ingestion</a></li>
                                	<li><a href="../../project-components/machine-learning">Machine Learning</a></li>
                                  <li><a href="../../project-components/suspicious-connects-analysis">Suspicous Connects Analysis</a></li>
                                	<li><a href="../../project-components/visualization">Visualization</a></li>
                                  <li class="under-dev">Under Development</li>
                                  <li><a href="../../project-components/open-data-models">Open Data Models</a></li>
                                </ul>
                            </li>
                            <li id="menu-item-13" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-13 active">
                                <a href="../../blog">Blog</a>
                            </li>
                        </ul>
                    </nav>

                </div>

            </header>

            <div id="mobile-nav"></div>

            <div id="content">

                <div id="inner-content" class="wrap cf">

                    <main id="main" class="m-all t-2of3 d-5of7 cf" role="main" itemscope itemprop="mainContentOfPage" itemtype="http://schema.org/Blog">

                        <article id="post-113" class="cf post-113 post type-post status-publish format-standard hentry category-uncategorized" role="article" itemscope itemprop="blogPost" itemtype="http://schema.org/BlogPosting">

                            <header class="article-header entry-header">

                                <h1 class="entry-title single-title" itemprop="headline" rel="bookmark">How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights</h1>

                                <p class="byline entry-meta vcard">

                                    <time class="updated entry-time" datetime="2016-08-08" itemprop="datePublished">
                                        August 8, 2016
                                    </time>
                                    </span>
                                </p>

                            </header>
                            <section class="entry-content cf" itemprop="articleBody">
                                <p>
                                    About four years ago, the era of the Big Data analytics began. Paired with advanced analytics, massive volumes of data can be culled to not only inform critical decisions, but also to simulate sophisticated “what if” scenarios that allow companies to gain competitive advantages by generating and predicting different scenarios. For example, a financial services company can more accurately determine what other products to offer a customer, and in what order, based on a wide variety of data, then use advanced analytics to gather insights. Creating a data lake that can be effectively used for predictive analytics raises tough questions — what data sources should we use?  How should this data be collected and ingested? What are the best algorithms to analyze the data, and how should we present these results to our decision maker?
                                </p>
                                <p>
                                    Apache Spot can help to solve most of these issues. Following is a description of the Apache Spot, which is designed to facilitate Big Data analytics scenarios like the financial services company’s question about the right product to offer customers.
                                </p>
                                <a href="../../library/images/ONI_Architecture-Diagram_1300_v4.png"><img src="../../library/images/ONI_Architecture-Diagram_1300_v4.png" alt="oni_architecture-diagram_1300_v4" /></a>
                                <h3><strong>Apache Spot Core Components</strong></h3>
                                <p>
                                    The Apache Spot Core is composed of three main components — data integration (collectors), data store (HDFS here, but can also be a non-SQL database) and machine learning.
                                </p>
                                <p>
                                    In this diagram, the top left shows Apache Spot Data Sources, which include the collection of the information that will be used to create a data lake. The process is simple. Define a pull or push from the source of information then capture this information on Apache Spot’s “collectors.” The collectors are processes that interpret the information that is sent, then write it to the HDFS system in the Apache Spot cluster. The HDFS stores the data lake and ensures that resources can grow while remaining economical at every size. The Apache Spot algorithms are part of machine learning and are used to detect the uncommon information in the data lake.
                                </p>
                                <h3><strong>Operational Analytics</strong></h3>
                                <p>
                                    As part of operational analytics, Apache Spot executes different batch processes that add information to machine learning results to provide meaning and context. Using the financial services product example, basic customer data could be augmented with information about other customers in the same region along with information about which products those customers recommended or complained about. Basically, the data scientists can “play” with the data using different algorithms to identify insights.
                                </p>
                                <h3><strong>Visualizing Results</strong></h3>
                                <p>
                                    The Apache Spot GUI displays the results that the machine learning algorithms generate. Results are represented such that it is easy to identify both the most common things as well as find the most suspicious or uncommon information that is part of the data lake.
                                </p>
                                <h3><strong>Customizable Open Source</strong></h3>
                                <p>
                                    Because Apache Spot is an open-source project, most of the components depicted here can be modified by the end user.
                                </p>
                            </section>
                            <footer class="article-footer">

                                filed under: <a href="../../category/uncategorized/" rel="category tag">Uncategorized</a>

                            </footer>

                        </article>

                    </main>

					<div id="sidebar1" class="sidebar m-all t-1of3 d-2of7 last-col cf" role="complementary">

						<div id="recent-posts-2" class="widget widget_recent_entries">
							<h4 class="widgettitle">Recent Posts</h4>
							<ul>
								<li>
									<a href="../../blog/apache-spot-product-architecture-overview/">Apache Spot Product Architecture Overview</a>
								</li>
								<li>
									<a href="../../blog/strength-in-numbers-why-consider-open-source-cybersecurity-analytics/">Strength in Numbers:  Why Consider Open Source Cybersecurity Analytics</a>
								</li>
								<li>
									<a href="../../blog/jupyter-notebooks-for-data-analysis/">Jupyter Notebooks for Data Analysis</a>
								</li>
								<li>
									<a href="../../blog/apache-spot-and-cybersecurity-using-netflows-to-detect-threats-to-critical-infrastructure/">Apache Spot (Incubating) and Cybersecurity — Using NetFlows to Detect Threats to  Critical Infrastructure</a>
								</li>
								<li>
									<a href="../../blog/how-apache-spot-helps-create-well-stocked-data-lakes-and-catch-powerful-insights/">How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights</a>
								</li>
								<li>
									<a href="../../blog/apache-spot-3-most-asked-questions/">Apache Spot (Incubating): 3 Most-Asked Questions</a>
								</li>
							</ul>
						</div>
						<div id="archives-2" class="widget widget_archive">
							<h4 class="widgettitle">Archives</h4>
							<ul>
								<li>
									<a href='../../2017/03/'>March 2017</a>
								</li>
								<li>
									<a href='../../2016/10/'>October 2016</a>
								</li>
								<li>
									<a href='../../2016/09/'>September 2016</a>
								</li>
								<li>
									<a href='../../2016/08/'>August 2016</a>
								</li>
								<li>
									<a href='../../2016/03/'>March 2016</a>
								</li>
							</ul>
						</div>

					</div>

                </div>

            </div>


            <footer class="footer" role="contentinfo" itemscope itemtype="http://schema.org/WPFooter">

                <div id="inner-footer" class="wrap cf">

                    <p class="source-org copyright" style="text-align:center;">
                        &copy; 2019 Apache Spot.
                    </p>

                </div>

            </footer>

        </div>
		<a href="#0" class="cd-top">Top</a>
        <script type='text/javascript' src='../../library/js/scripts.js'></script>

    </body>

</html>
