<!doctype html>

<!--[if lt IE 7]><html lang="en-US" class="no-js lt-ie9 lt-ie8 lt-ie7"><![endif]-->
<!--[if (IE 7)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9 lt-ie8"><![endif]-->
<!--[if (IE 8)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9"><![endif]-->
<!--[if gt IE 8]><!-->
<html lang="en-US" class="no-js">
    <!--<![endif]-->

    <head>
        <meta charset="utf-8">

        <meta http-equiv="X-UA-Compatible" content="IE=edge">

        <title>Apache Spot (Incubating)  How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights</title>

        <meta name="HandheldFriendly" content="True">
        <meta name="MobileOptimized" content="320">
        <meta name="viewport" content="width=device-width, initial-scale=1"/>

        <link rel="apple-touch-icon" href="../wp-content/themes/oni/library/images/apple-touch-icon.png">
        <link rel="icon" href="../wp-content/themes/oni/favicon.png">
        <!--[if IE]>
        <link rel="shortcut icon" href="../wp-content/themes/oni/favicon.ico">
        <![endif]-->
        <meta name="msapplication-TileColor" content="#f01d4f">
        <meta name="msapplication-TileImage" content="../wp-content/themes/oni/library/images/win8-tile-icon.png">
        <meta name="theme-color" content="#121212">

        <link rel="pingback" href="../xmlrpc.php">

        <link rel='dns-prefetch' href='//fonts.googleapis.com' />
        <link rel='dns-prefetch' href='//s.w.org' />
        <link rel="alternate" type="application/rss+xml" title="Apache Spot (Incubating) &raquo; Feed" href="../feed/" />
        <link rel="alternate" type="application/rss+xml" title="Apache Spot (Incubating) &raquo; Comments Feed" href="../comments/feed/" />
        <link rel="alternate" type="application/rss+xml" title="Apache Spot (Incubating) &raquo; How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights Comments Feed" href="../how-open-network-insight-helps-create-well-stocked-data-lakes-and-catch-powerful-insights/feed/" />
        <script type="text/javascript">
			window._wpemojiSettings = {
				"baseUrl" : "https:\/\/s.w.org\/images\/core\/emoji\/2\/72x72\/",
				"ext" : ".png",
				"svgUrl" : "https:\/\/s.w.org\/images\/core\/emoji\/2\/svg\/",
				"svgExt" : ".svg",
				"source" : {
					"concatemoji" : "http:\/\/spot.incubator.apache.org\/wp-includes\/js\/wp-emoji-release.min.js"
				}
			}; ! function(a, b, c) {
				function d(a) {
					var c,
					    d,
					    e,
					    f,
					    g,
					    h = b.createElement("canvas"),
					    i = h.getContext && h.getContext("2d"),
					    j = String.fromCharCode;
					if (!i || !i.fillText)
						return !1;
					switch(i.textBaseline="top",i.font="600 32px Arial",a) {
					case"flag":
						return i.fillText(j(55356, 56806, 55356, 56826), 0, 0), !(h.toDataURL().length < 3e3) && (i.clearRect(0, 0, h.width, h.height), i.fillText(j(55356, 57331, 65039, 8205, 55356, 57096), 0, 0),
						c = h.toDataURL(), i.clearRect(0, 0, h.width, h.height), i.fillText(j(55356, 57331, 55356, 57096), 0, 0),
						d = h.toDataURL(), c !== d);
					case"diversity":
						return i.fillText(j(55356, 57221), 0, 0),
						e = i.getImageData(16, 16, 1, 1).data,
						f = e[0] + "," + e[1] + "," + e[2] + "," + e[3], i.fillText(j(55356, 57221, 55356, 57343), 0, 0),
						e = i.getImageData(16, 16, 1, 1).data,
						g = e[0] + "," + e[1] + "," + e[2] + "," + e[3], f !== g;
					case"simple":
						return i.fillText(j(55357, 56835), 0, 0), 0 !== i.getImageData(16,16,1,1).data[0];
					case"unicode8":
						return i.fillText(j(55356, 57135), 0, 0), 0 !== i.getImageData(16,16,1,1).data[0];
					case"unicode9":
						return i.fillText(j(55358, 56631), 0, 0), 0 !== i.getImageData(16,16,1,1).data[0]
					}
					return !1
				}

				function e(a) {
					var c = b.createElement("script");
					c.src = a, c.type = "text/javascript", b.getElementsByTagName("head")[0].appendChild(c)
				}

				var f,
				    g,
				    h,
				    i;
				for ( i = Array("simple", "flag", "unicode8", "diversity", "unicode9"), c.supports = {
					everything : !0,
					everythingExceptFlag : !0
				},
				h = 0; h < i.length; h++)
					c.supports[i[h]] = d(i[h]), c.supports.everything = c.supports.everything && c.supports[i[h]], "flag" !== i[h] && (c.supports.everythingExceptFlag = c.supports.everythingExceptFlag && c.supports[i[h]]);
				c.supports.everythingExceptFlag = c.supports.everythingExceptFlag && !c.supports.flag, c.DOMReady = !1, c.readyCallback = function() {
					c.DOMReady = !0
				}, c.supports.everything || ( g = function() {
					c.readyCallback()
				}, b.addEventListener ? (b.addEventListener("DOMContentLoaded", g, !1), a.addEventListener("load", g, !1)) : (a.attachEvent("onload", g), b.attachEvent("onreadystatechange", function() {
					"complete" === b.readyState && c.readyCallback()
				})),
				f = c.source || {}, f.concatemoji ? e(f.concatemoji) : f.wpemoji && f.twemoji && (e(f.twemoji), e(f.wpemoji)))
			}(window, document, window._wpemojiSettings);
        </script>
        <style type="text/css">
			img.wp-smiley, img.emoji {
				display: inline !important;
				border: none !important;
				box-shadow: none !important;
				height: 1em !important;
				width: 1em !important;
				margin: 0 .07em !important;
				vertical-align: -0.1em !important;
				background: none !important;
				padding: 0 !important;
			}
        </style>
        <link rel='stylesheet' id='googleFonts-css'  href='http://fonts.googleapis.com/css?family=Lato%3A400%2C700%2C400italic%2C700italic' type='text/css' media='all' />
        <link rel='stylesheet' id='bones-stylesheet-css'  href='../wp-content/themes/oni/library/css/style.css' type='text/css' media='all' />
        <!--[if lt IE 9]>
        <link rel='stylesheet' id='bones-ie-only-css'  href='../wp-content/themes/oni/library/css/ie.css' type='text/css' media='all' />
        <![endif]-->
        <link rel='stylesheet' id='mm-css-css'  href='../wp-content/themes/oni/library/css/meanmenu.css' type='text/css' media='all' />
        <script type='text/javascript' src='../wp-content/themes/oni/library/js/libs/modernizr.custom.min.js'></script>
        <script type='text/javascript' src='../wp-includes/js/jquery/jquery.js'></script>
        <script type='text/javascript' src='../wp-includes/js/jquery/jquery-migrate.min.js'></script>
        <script type='text/javascript' src='../wp-content/themes/oni/library/js/jquery.meanmenu.js'></script>
        <link rel='https://api.w.org/' href='../wp-json/' />
        <link rel="canonical" href="../how-open-network-insight-helps-create-well-stocked-data-lakes-and-catch-powerful-insights/" />
        <link rel='shortlink' href='../?p=113' />
        <link rel="alternate" type="application/json+oembed" href="../wp-json/oembed/1.0/embed?url=http%3A%2F%2Fnolamarketing.com%2Fclient%2Foni%2Fhow-open-network-insight-helps-create-well-stocked-data-lakes-and-catch-powerful-insights%2F" />
        <link rel="alternate" type="text/xml+oembed" href="../wp-json/oembed/1.0/embed?url=http%3A%2F%2Fnolamarketing.com%2Fclient%2Foni%2Fhow-open-network-insight-helps-create-well-stocked-data-lakes-and-catch-powerful-insights%2F&#038;format=xml" />

        <script>
			(function(i, s, o, g, r, a, m) {
				i['GoogleAnalyticsObject'] = r;
				i[r] = i[r] ||
				function() {
					(i[r].q = i[r].q || []).push(arguments)
				}, i[r].l = 1 * new Date();
				a = s.createElement(o),
				m = s.getElementsByTagName(o)[0];
				a.async = 1;
				a.src = g;
				m.parentNode.insertBefore(a, m)
			})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');

			ga('create', 'UA-75955621-1', 'auto');
			ga('send', 'pageview');

        </script>
    </head>

    <body class="single single-post postid-113 single-format-standard" itemscope itemtype="http://schema.org/WebPage">

        <div id="container">

            <header class="header" role="banner" itemscope itemtype="http://schema.org/WPHeader">

                <div id="inner-header" class="wrap cf">

                    <p id="logo" class="h1" itemscope itemtype="http://schema.org/Organization">
                        <a href="http://spot.incubator.apache.org" rel="nofollow"><img src="../wp-content/themes/oni/library/images/logo.png" alt="Open Network Insight" /></a>
                    </p>

                    <nav role="navigation" itemscope itemtype="http://schema.org/SiteNavigationElement">
                        <ul id="menu-main-menu" class="nav top-nav cf">
                            <li id="menu-item-129" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-129">
                                <a target="_blank" href="https://github.com/Open-Network-Insight/open-network-insight">Get Started</a>
                            </li>
                            <li id="menu-item-5" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-5">
                                <a target="_blank" href="https://github.com/Open-Network-Insight/open-network-insight#if-you-want-all-of-the-oni-code-at-once-just-clone-it">GitHub</a>
                            </li>
                            <li id="menu-item-130" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-130">
                                <a target="_blank" href="https://github.com/Open-Network-Insight/open-network-insight#contributing-to-oni">Contribute</a>
                            </li>
                            <li id="menu-item-106" class="menu-item menu-item-type-custom menu-item-object-custom menu-item-106">
                                <a target="_blank" href="https://github.com/Open-Network-Insight/open-network-insight/wiki">Wiki</a>
                            </li>
                            <li id="menu-item-13" class="menu-item menu-item-type-post_type menu-item-object-page current_page_parent menu-item-13">
                                <a href="../blog/">Blog</a>
                            </li>
                        </ul>
                    </nav>

                </div>

            </header>

            <div id="mobile-nav"></div>

            <div id="content">

                <div id="inner-content" class="wrap cf">

                    <main id="main" class="m-all t-2of3 d-5of7 cf" role="main" itemscope itemprop="mainContentOfPage" itemtype="http://schema.org/Blog">

                        <article id="post-113" class="cf post-113 post type-post status-publish format-standard hentry category-uncategorized" role="article" itemscope itemprop="blogPost" itemtype="http://schema.org/BlogPosting">

                            <header class="article-header entry-header">

                                <h1 class="entry-title single-title" itemprop="headline" rel="bookmark">How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights</h1>

                                <p class="byline entry-meta vcard">

                                    <time class="updated entry-time" datetime="2016-08-08" itemprop="datePublished">
                                        August 8, 2016
                                    </time>
                                    </span>
                                </p>

                            </header>
                            <section class="entry-content cf" itemprop="articleBody">
                                <p>
                                    About four years ago, the era of the Big Data analytics began. Paired with advanced analytics, massive volumes of data can be culled to not only inform critical decisions, but also to simulate sophisticated “what if” scenarios that allow companies to gain competitive advantages by generating and predicting different scenarios. For example, a financial services company can more accurately determine what other products to offer a customer, and in what order, based on a wide variety of data, then use advanced analytics to gather insights. Creating a data lake that can be effectively used for predictive analytics raises tough questions — what data sources should we use?  How should this data be collected and ingested? What are the best algorithms to analyze the data, and how should we present these results to our decision maker?
                                </p>
                                <p>
                                    Apache Spot (Incubating) can help to solve most of these issues. Following is a description of the Apache Spot (Incubating), which is designed to facilitate Big Data analytics scenarios like the financial services company’s question about the right product to offer customers.
                                </p>
                                <a href="../wp-content/uploads/2016/09/ONI_Architecture-Diagram_1300_v4.png"><img src="../wp-content/uploads/2016/09/ONI_Architecture-Diagram_1300_v4.png" alt="oni_architecture-diagram_1300_v4" width="1300" height="675" class="alignnone size-full wp-image-114" srcset="../wp-content/uploads/2016/09/ONI_Architecture-Diagram_1300_v4.png 1300w, ../wp-content/uploads/2016/09/ONI_Architecture-Diagram_1300_v4-300x156.png 300w, ../wp-content/uploads/2016/09/ONI_Architecture-Diagram_1300_v4-768x399.png 768w, ../wp-content/uploads/2016/09/ONI_Architecture-Diagram_1300_v4-1024x532.png 1024w" sizes="(max-width: 1300px) 100vw, 1300px" /></a>
                                <h3><strong>Apache Spot (Incubating) Core Components</strong></h3>
                                <p>
                                    The Apache Spot (Incubating) Core is composed of three main components — data integration (collectors), data store (HDFS here, but can also be a non-SQL database) and machine learning.
                                </p>
                                <p>
                                    In this diagram, the top left shows Apache Spot (Incubating) Data Sources, which include the collection of the information that will be used to create a data lake. The process is simple. Define a pull or push from the source of information then capture this information on Apache Spot (Incubating)’s “collectors.” The collectors are processes that interpret the information that is sent, then write it to the HDFS system in the Apache Spot (Incubating) cluster. The HDFS stores the data lake and ensures that resources can grow while remaining economical at every size. The Apache Spot (Incubating) algorithms are part of machine learning and are used to detect the uncommon information in the data lake.
                                </p>
                                <h3><strong>Operational Analytics</strong></h3>
                                <p>
                                    As part of operational analytics, Apache Spot (Incubating) executes different batch processes that add information to machine learning results to provide meaning and context. Using the financial services product example, basic customer data could be augmented with information about other customers in the same region along with information about which products those customers recommended or complained about. Basically, the data scientists can “play” with the data using different algorithms to identify insights.
                                </p>
                                <h3><strong>Visualizing Results</strong></h3>
                                <p>
                                    The Apache Spot (Incubating) GUI displays the results that the machine learning algorithms generate. Results are represented such that it is easy to identify both the most common things as well as find the most suspicious or uncommon information that is part of the data lake.
                                </p>
                                <h3><strong>Customizable Open Source</strong></h3>
                                <p>
                                    Because Apache Spot (Incubating) is an open-source project, most of the components depicted here can be modified by the end user.
                                </p>
                            </section>
                            <footer class="article-footer">

                                filed under: <a href="../category/uncategorized/" rel="category tag">Uncategorized</a>

                            </footer>

                        </article>

                    </main>

                    <div id="sidebar1" class="sidebar m-all t-1of3 d-2of7 last-col cf" role="complementary">

                        <div id="recent-posts-2" class="widget widget_recent_entries">
                            <h4 class="widgettitle">Recent Posts</h4>
                            <ul>
                                <li>
                                    <a href="../open-network-insight-oni-and-cybersecurity-using-netflows-to-detect-threats-to-critical-infrastructure/">Apache Spot (Incubating) and Cybersecurity — Using NetFlows to Detect Threats to  Critical Infrastructure</a>
                                </li>
                                <li>
                                    <a href="../how-open-network-insight-helps-create-well-stocked-data-lakes-and-catch-powerful-insights/">How Apache Spot (Incubating) Helps Create Well-Stocked Data Lakes and Catch Powerful Insights</a>
                                </li>
                                <li>
                                    <a href="../open-network-insight-3-most-asked-questions/">Apache Spot (Incubating): Three Most-Asked Questions</a>
                                </li>
                            </ul>
                        </div>
                        <div id="archives-2" class="widget widget_archive">
                            <h4 class="widgettitle">Archives</h4>
                            <ul>
                                <li>
                                    <a href='../2016/08/'>August 2016</a>
                                </li>
                                <li>
                                    <a href='../2016/03/'>March 2016</a>
                                </li>
                            </ul>
                        </div>

                    </div>

                </div>

            </div>

            <footer class="footer" role="contentinfo" itemscope itemtype="http://schema.org/WPFooter">

                <div id="inner-footer" class="wrap cf">

                    <nav role="navigation"></nav>
                    <p class="source-org copyright" style="text-align:center;">
                        &copy; 2020 Apache Spot (Incubating).
                    </p>

                </div>

            </footer>

        </div>

        <script type='text/javascript' src='../wp-includes/js/comment-reply.min.js'></script>
        <script type='text/javascript' src='../wp-content/themes/oni/library/js/scripts.js'></script>
        <script type='text/javascript' src='../wp-includes/js/wp-embed.min.js'></script>

    </body>

</html>
<!-- end of site. what a ride! -->
