Publish commit(s) from site source repo: 4f7d347 kudu flume sink blog post Site-Repo-Commit: 4f7d347a0998e89604594cad3810efa562257cbb

commit: 3f1e7692f0a87ca4b48b739aa2ca6042383b215b [log] [tgz]
author: Mike Percy <mpercy@cloudera.com> Wed Aug 31 09:22:22 2016 -0700
committer: Mike Percy <mpercy@cloudera.com> Wed Aug 31 09:22:22 2016 -0700
tree: 7595c4286d9ac8c74742303e895d4289f45b9019
parent: 40648a098a8060638a306109f679c5d094b7b4dd [diff]
diff --git a/2016/02/26/apache-kudu-0-7-0-released.html b/2016/02/26/apache-kudu-0-7-0-released.html
index a8989d4..5ec5f81 100644
--- a/2016/02/26/apache-kudu-0-7-0-released.html
+++ b/2016/02/26/apache-kudu-0-7-0-released.html

@@ -143,6 +143,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -171,8 +173,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/03/10/apache-kudu-0-7-1-released.html b/2016/03/10/apache-kudu-0-7-1-released.html
index e37d93f..71b4a56 100644
--- a/2016/03/10/apache-kudu-0-7-1-released.html
+++ b/2016/03/10/apache-kudu-0-7-1-released.html

@@ -142,6 +142,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -170,8 +172,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/03/20/weekly-update.html b/2016/03/20/weekly-update.html
index 666e097..50395a0 100644
--- a/2016/03/20/weekly-update.html
+++ b/2016/03/20/weekly-update.html

@@ -202,6 +202,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -230,8 +232,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/03/28/weekly-update.html b/2016/03/28/weekly-update.html
index 40e4471..2097242 100644
--- a/2016/03/28/weekly-update.html
+++ b/2016/03/28/weekly-update.html

@@ -193,6 +193,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -221,8 +223,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/04/weekly-update.html b/2016/04/04/weekly-update.html
index 4fcd6d1..5aa7710 100644
--- a/2016/04/04/weekly-update.html
+++ b/2016/04/04/weekly-update.html

@@ -212,6 +212,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -240,8 +242,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/11/weekly-update.html b/2016/04/11/weekly-update.html
index b92142d..45e977c 100644
--- a/2016/04/11/weekly-update.html
+++ b/2016/04/11/weekly-update.html

@@ -212,6 +212,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -240,8 +242,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/12/apache-kudu-0-8-0-released.html b/2016/04/12/apache-kudu-0-8-0-released.html
index 321cd88..da6e6e6 100644
--- a/2016/04/12/apache-kudu-0-8-0-released.html
+++ b/2016/04/12/apache-kudu-0-8-0-released.html

@@ -133,6 +133,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -161,8 +163,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/14/ingesting-json-apache-kudu-streamsets-data-collector.html b/2016/04/14/ingesting-json-apache-kudu-streamsets-data-collector.html
index e96d986..928445c 100644
--- a/2016/04/14/ingesting-json-apache-kudu-streamsets-data-collector.html
+++ b/2016/04/14/ingesting-json-apache-kudu-streamsets-data-collector.html

@@ -156,6 +156,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -184,8 +186,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/18/weekly-update.html b/2016/04/18/weekly-update.html
index d887649..bfe9c0b 100644
--- a/2016/04/18/weekly-update.html
+++ b/2016/04/18/weekly-update.html

@@ -210,6 +210,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -238,8 +240,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/19/kudu-0-8-0-predicate-improvements.html b/2016/04/19/kudu-0-8-0-predicate-improvements.html
index ae16ce6..3d4d373 100644
--- a/2016/04/19/kudu-0-8-0-predicate-improvements.html
+++ b/2016/04/19/kudu-0-8-0-predicate-improvements.html

@@ -190,6 +190,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -218,8 +220,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/25/weekly-update.html b/2016/04/25/weekly-update.html
index d21d078..81c5a0c 100644
--- a/2016/04/25/weekly-update.html
+++ b/2016/04/25/weekly-update.html

@@ -209,6 +209,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -237,8 +239,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/04/26/ycsb.html b/2016/04/26/ycsb.html
index c2375fa..9aebfba 100644
--- a/2016/04/26/ycsb.html
+++ b/2016/04/26/ycsb.html

@@ -171,16 +171,22 @@
 
 <p>Also note that the 99th percentile latency seems to alternate between close to zero and a value near 500ms. This bimodal distribution led me to grep in the Java source for the magic number 500. Sure enough, I found:</p>
 
-<pre><code class="language-java">public static final int SLEEP_TIME = 500;
-</code></pre>
+<div class="highlighter-coderay"><div class="CodeRay">
+  <div class="code"><pre><span class="line-numbers"><a href="#n1" name="n1">1</a></span><span style="color:#088;font-weight:bold">public</span> <span style="color:#088;font-weight:bold">static</span> <span style="color:#088;font-weight:bold">final</span> <span style="color:#339;font-weight:bold">int</span> SLEEP_TIME = <span style="color:#00D">500</span>;
+</pre></div>
+</div>
+</div>
 
 <p>Used in this backoff calculation method (slightly paraphrased here):</p>
 
-<pre><code class="language-java">  long getSleepTimeForRpc(KuduRpc&lt;?&gt; rpc) {
-    // TODO backoffs? Sleep in increments of 500 ms, plus some random time up to 50
-    return (attemptCount * SLEEP_TIME) + sleepRandomizer.nextInt(50);
-  }
-</code></pre>
+<div class="highlighter-coderay"><div class="CodeRay">
+  <div class="code"><pre><span class="line-numbers"><a href="#n1" name="n1">1</a></span>  <span style="color:#339;font-weight:bold">long</span> getSleepTimeForRpc(KuduRpc&lt;?&gt; rpc) {
+<span class="line-numbers"><a href="#n2" name="n2">2</a></span>    <span style="color:#777">// TODO backoffs? Sleep in increments of 500 ms, plus some random time up to 50</span>
+<span class="line-numbers"><a href="#n3" name="n3">3</a></span>    <span style="color:#080;font-weight:bold">return</span> (attemptCount * SLEEP_TIME) + sleepRandomizer.nextInt(<span style="color:#00D">50</span>);
+<span class="line-numbers"><a href="#n4" name="n4">4</a></span>  }
+</pre></div>
+</div>
+</div>
 
 <p>One reason that a client will back off and retry is a <code>SERVER_TOO_BUSY</code> response from the server. This response is used in a number of overload situations. In a write-mostly workload, the most likely situation is that the server is low on memory and thus asking clients to back off while it flushes. Sure enough, when we graph the heap usage over time, as well as the rate of writes rejected due to low-memory, we see that this is the case:</p>
 
@@ -409,6 +415,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -437,8 +445,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/05/03/weekly-update.html b/2016/05/03/weekly-update.html
index 912a3b5..0f02c73 100644
--- a/2016/05/03/weekly-update.html
+++ b/2016/05/03/weekly-update.html

@@ -184,6 +184,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -212,8 +214,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/05/09/weekly-update.html b/2016/05/09/weekly-update.html
index 8aecbbf..cdb7805 100644
--- a/2016/05/09/weekly-update.html
+++ b/2016/05/09/weekly-update.html

@@ -174,6 +174,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -202,8 +204,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/05/16/weekly-update.html b/2016/05/16/weekly-update.html
index 5e22000..00b29f6 100644
--- a/2016/05/16/weekly-update.html
+++ b/2016/05/16/weekly-update.html

@@ -209,6 +209,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -237,8 +239,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/05/23/weekly-update.html b/2016/05/23/weekly-update.html
index b9df224..8dddfbc 100644
--- a/2016/05/23/weekly-update.html
+++ b/2016/05/23/weekly-update.html

@@ -198,6 +198,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -226,8 +228,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/01/weekly-update.html b/2016/06/01/weekly-update.html
index 1420822..36016d6 100644
--- a/2016/06/01/weekly-update.html
+++ b/2016/06/01/weekly-update.html

@@ -164,6 +164,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -192,8 +194,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/02/no-default-partitioning.html b/2016/06/02/no-default-partitioning.html
index e27382a..69d53d5 100644
--- a/2016/06/02/no-default-partitioning.html
+++ b/2016/06/02/no-default-partitioning.html

@@ -205,6 +205,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -233,8 +235,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/06/weekly-update.html b/2016/06/06/weekly-update.html
index 9844630..2d9540d 100644
--- a/2016/06/06/weekly-update.html
+++ b/2016/06/06/weekly-update.html

@@ -159,6 +159,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -187,8 +189,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/10/apache-kudu-0-9-0-released.html b/2016/06/10/apache-kudu-0-9-0-released.html
index fa4cf02..6da7ab2 100644
--- a/2016/06/10/apache-kudu-0-9-0-released.html
+++ b/2016/06/10/apache-kudu-0-9-0-released.html

@@ -134,6 +134,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -162,8 +164,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/13/weekly-update.html b/2016/06/13/weekly-update.html
index 65e74a6..3320c73 100644
--- a/2016/06/13/weekly-update.html
+++ b/2016/06/13/weekly-update.html

@@ -167,6 +167,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -195,8 +197,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/17/raft-consensus-single-node.html b/2016/06/17/raft-consensus-single-node.html
index d670411..8f56ca7 100644
--- a/2016/06/17/raft-consensus-single-node.html
+++ b/2016/06/17/raft-consensus-single-node.html

@@ -209,6 +209,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -237,8 +239,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/21/weekly-update.html b/2016/06/21/weekly-update.html
index 1e0daf0..b1f5357 100644
--- a/2016/06/21/weekly-update.html
+++ b/2016/06/21/weekly-update.html

@@ -160,6 +160,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -188,8 +190,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/24/multi-master-1-0-0.html b/2016/06/24/multi-master-1-0-0.html
index 826bbad..c50fae3 100644
--- a/2016/06/24/multi-master-1-0-0.html
+++ b/2016/06/24/multi-master-1-0-0.html

@@ -222,6 +222,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -250,8 +252,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/06/27/weekly-update.html b/2016/06/27/weekly-update.html
index c0339dd..11bf8a9 100644
--- a/2016/06/27/weekly-update.html
+++ b/2016/06/27/weekly-update.html

@@ -224,6 +224,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -252,8 +254,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/07/01/apache-kudu-0-9-1-released.html b/2016/07/01/apache-kudu-0-9-1-released.html
index e51e2da..7b5dc2b 100644
--- a/2016/07/01/apache-kudu-0-9-1-released.html
+++ b/2016/07/01/apache-kudu-0-9-1-released.html

@@ -132,6 +132,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -160,8 +162,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/07/11/weekly-update.html b/2016/07/11/weekly-update.html
index 7295f80..dc41ee6 100644
--- a/2016/07/11/weekly-update.html
+++ b/2016/07/11/weekly-update.html

@@ -190,6 +190,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -218,8 +220,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/07/18/weekly-update.html b/2016/07/18/weekly-update.html
index 6210103..8a64e18 100644
--- a/2016/07/18/weekly-update.html
+++ b/2016/07/18/weekly-update.html

@@ -182,6 +182,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -210,8 +212,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/07/25/asf-graduation.html b/2016/07/25/asf-graduation.html
index fdac1c7..97006d8 100644
--- a/2016/07/25/asf-graduation.html
+++ b/2016/07/25/asf-graduation.html

@@ -158,6 +158,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -186,8 +188,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/07/26/weekly-update.html b/2016/07/26/weekly-update.html
index 73c4172..640119a9 100644
--- a/2016/07/26/weekly-update.html
+++ b/2016/07/26/weekly-update.html

@@ -190,6 +190,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -218,8 +220,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/08/08/weekly-update.html b/2016/08/08/weekly-update.html
index c49fa8f..20cebfa 100644
--- a/2016/08/08/weekly-update.html
+++ b/2016/08/08/weekly-update.html

@@ -189,6 +189,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -217,8 +219,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/08/16/weekly-update.html b/2016/08/16/weekly-update.html
index 44ce091..33734a7 100644
--- a/2016/08/16/weekly-update.html
+++ b/2016/08/16/weekly-update.html

@@ -239,6 +239,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -267,8 +269,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/08/23/apache-kudu-0-10-0-released.html b/2016/08/23/apache-kudu-0-10-0-released.html
index c9122ea..2f67749 100644
--- a/2016/08/23/apache-kudu-0-10-0-released.html
+++ b/2016/08/23/apache-kudu-0-10-0-released.html

@@ -155,6 +155,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -183,8 +185,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/08/23/new-range-partitioning-features.html b/2016/08/23/new-range-partitioning-features.html
index 1667345..70bd429 100644
--- a/2016/08/23/new-range-partitioning-features.html
+++ b/2016/08/23/new-range-partitioning-features.html

@@ -215,6 +215,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -243,8 +245,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/2016/08/31/intro-flume-kudu-sink.html b/2016/08/31/intro-flume-kudu-sink.html
new file mode 100644
index 0000000..95c3e01
--- /dev/null
+++ b/2016/08/31/intro-flume-kudu-sink.html

@@ -0,0 +1,491 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <meta name="description" content="A new open source Apache Hadoop ecosystem project, Apache Kudu completes Hadoop's storage layer to enable fast analytics on fast data" />
+    <meta name="author" content="Cloudera" />
+    <title>Apache Kudu - An Introduction to Kudu Flume Sink</title>
+    <!-- Bootstrap core CSS -->
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"
+          integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7"
+          crossorigin="anonymous">
+
+    <!-- Custom styles for this template -->
+    <link href="/css/kudu.css" rel="stylesheet"/>
+    <link href="/css/asciidoc.css" rel="stylesheet"/>
+    <link rel="shortcut icon" href="/img/logo-favicon.ico" />
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.1/css/font-awesome.min.css" />
+
+    
+    <link rel="alternate" type="application/atom+xml"
+      title="RSS Feed for Apache Kudu blog"
+      href="/feed.xml" />
+    
+
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!--[if lt IE 9]>
+        <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+        <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+        <![endif]-->
+  </head>
+  <body>
+    <div class="kudu-site container-fluid">
+      <!-- Static navbar -->
+        <nav class="navbar navbar-default">
+          <div class="container-fluid">
+            <div class="navbar-header">
+              <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
+                <span class="sr-only">Toggle navigation</span>
+                <span class="icon-bar"></span>
+                <span class="icon-bar"></span>
+                <span class="icon-bar"></span>
+              </button>
+              
+              <a class="logo" href="/"><img
+                src="//d3dr9sfxru4sde.cloudfront.net/i/k/apachekudu_logo_0716_80px.png"
+                srcset="//d3dr9sfxru4sde.cloudfront.net/i/k/apachekudu_logo_0716_80px.png 1x, //d3dr9sfxru4sde.cloudfront.net/i/k/apachekudu_logo_0716_160px.png 2x"
+                alt="Apache Kudu"/></a>
+              
+            </div>
+            <div id="navbar" class="collapse navbar-collapse">
+              <ul class="nav navbar-nav navbar-right">
+                <li >
+                  <a href="/">Home</a>
+                </li>
+                <li >
+                  <a href="/overview.html">Overview</a>
+                </li>
+                <li >
+                  <a href="/docs/">Documentation</a>
+                </li>
+                <li >
+                  <a href="/releases/">Download</a>
+                </li>
+                <li class="active">
+                  <a href="/blog/">Blog</a>
+                </li>
+                <!-- NOTE: this dropdown menu does not appear on Mobile, so don't add anything here
+                     that doesn't also appear elsewhere on the site. -->
+                <li class="dropdown">
+                  <a href="/community.html" role="button" aria-haspopup="true" aria-expanded="false">Community <span class="caret"></span></a>
+                  <ul class="dropdown-menu">
+                    <li class="dropdown-header">GET IN TOUCH</li>
+                    <li><a class="icon email" href="/community.html">Mailing Lists</a></li>
+                    <li><a class="icon slack" href="https://getkudu-slack.herokuapp.com/">Slack Channel</a></li>
+                    <li role="separator" class="divider"></li>
+                    <li><a href="/community.html#meetups-user-groups-and-conference-presentations">Events and Meetups</a></li>
+                    <li><a href="/committers.html">Project Committers</a></li>
+                    <!--<li><a href="/roadmap.html">Roadmap</a></li>-->
+                    <li><a href="/community.html#contributions">How to Contribute</a></li>
+                    <li role="separator" class="divider"></li>
+                    <li class="dropdown-header">DEVELOPER RESOURCES</li>
+                    <li><a class="icon github" href="https://github.com/apache/incubator-kudu">GitHub</a></li>
+                    <li><a class="icon gerrit" href="http://gerrit.cloudera.org:8080/#/q/status:open+project:kudu">Gerrit Code Review</a></li>
+                    <li><a class="icon jira" href="https://issues.apache.org/jira/browse/KUDU">JIRA Issue Tracker</a></li>
+                    <li role="separator" class="divider"></li>
+                    <li class="dropdown-header">SOCIAL MEDIA</li>
+                    <li><a class="icon twitter" href="https://twitter.com/ApacheKudu">Twitter</a></li>
+                  </ul>
+                </li>
+                <li >
+                  <a href="/faq.html">FAQ</a>
+                </li>
+              </ul><!-- /.nav -->
+            </div><!-- /#navbar -->
+          </div><!-- /.container-fluid -->
+        </nav>
+
+<div class="row header">
+  <div class="col-lg-12">
+    <h2><a href="/blog">Apache Kudu Blog</a></h2>
+  </div>
+</div>
+
+<div class="row-fluid">
+  <div class="col-lg-9">
+    <article>
+  <header>
+    <h1 class="entry-title">An Introduction to Kudu Flume Sink</h1>
+    <p class="meta">Posted 31 Aug 2016 by Ara Abrahamian</p>
+  </header>
+  <div class="entry-content">
+    <p>This post discusses the Kudu Flume Sink. First, I’ll give some background on why we considered
+using Kudu, what Flume does for us, and how Flume fits with Kudu in our project.</p>
+
+<h1 id="why-kudu">Why Kudu</h1>
+<p>Traditionally in the Hadoop ecosystem we’ve dealt with various <em>batch processing</em> technologies such
+as MapReduce and the many libraries and tools built on top of it in various languages (Apache Pig,
+Apache Hive, Apache Oozie and many others). The main problem with this approach is that it needs to
+process the whole data set in batches, again and again, as soon as new data gets added. Things get
+really complicated when a few such tasks need to get chained together, or when the same data set
+needs to be processed in various ways by different jobs, while all compete for the shared cluster
+resources.</p>
+
+<p>The opposite of this approach is <em>stream processing</em>: process the data as soon as it arrives, not
+in batches. Streaming systems such as Spark Streaming, Storm, Kafka Streams, and many others make
+this possible. But writing streaming services is not trivial. The streaming systems are becoming
+more and more capable and support more complex constructs, but they are not yet easy to use. All
+queries and processes need to be carefully planned and implemented.</p>
+
+<p>To summarize, <em>batch processing</em> is:</p>
+
+<ul>
+  <li>file-based</li>
+  <li>a paradigm that processes large chunks of data as a group</li>
+  <li>high latency and high throughput, both for ingest and query</li>
+  <li>typically easy to program, but hard to orchestrate</li>
+  <li>well suited for writing ad-hoc queries, although they are typically high latency</li>
+</ul>
+
+<p>While <em>stream processing</em> is:</p>
+
+<ul>
+  <li>a totally different paradigm, which involves single events and time windows instead of large groups of events</li>
+  <li>still file-based and not a long-term database</li>
+  <li>not batch-oriented, but incremental</li>
+  <li>ultra-fast ingest and ultra-fast query (query results basically pre-calculated)</li>
+  <li>not so easy to program, relatively easy to orchestrate</li>
+  <li>impossible to write ad-hoc queries</li>
+</ul>
+
+<p>And a Kudu-based <em>near real-time</em> approach is:</p>
+
+<ul>
+  <li>flexible and expressive, thanks to SQL support via Apache Impala (incubating)</li>
+  <li>a table-oriented, mutable data store that feels like a traditional relational database</li>
+  <li>very easy to program, you can even pretend it’s good old MySQL</li>
+  <li>low-latency and relatively high throughput, both for ingest and query</li>
+</ul>
+
+<p>At Argyle Data, we’re dealing with complex fraud detection scenarios. We need to ingest massive
+amounts of data, run machine learning algorithms and generate reports. When we created our current
+architecture two years ago we decided to opt for a database as the backbone of our system. That
+database is Apache Accumulo. It’s a key-value based database which runs on top of Hadoop HDFS,
+quite similar to HBase but with some important improvements such as cell level security and ease
+of deployment and management. To enable querying of this data for quite complex reporting and
+analytics, we used Presto, a distributed query engine with a pluggable architecture open-sourced
+by Facebook. We wrote a connector for it to let it run queries against the Accumulo database. This
+architecture has served us well, but there were a few problems:</p>
+
+<ul>
+  <li>we need to ingest even more massive volumes of data in real-time</li>
+  <li>we need to perform complex machine-learning calculations on even larger data-sets</li>
+  <li>we need to support ad-hoc queries, plus long-term data warehouse functionality</li>
+</ul>
+
+<p>So, we’ve started gradually moving the core machine-learning pipeline to a streaming based
+solution. This way we can ingest and process larger data-sets faster in the real-time. But then how
+would we take care of ad-hoc queries and long-term persistence? This is where Kudu comes in. While
+the machine learning pipeline ingests and processes real-time data, we store a copy of the same
+ingested data in Kudu for long-term access and ad-hoc queries. Kudu is our <em>data warehouse</em>. By
+using Kudu and Impala, we can retire our in-house Presto connector and rely on Impala’s
+super-fast query engine.</p>
+
+<p>But how would we make sure data is reliably ingested into the streaming pipeline <em>and</em> the
+Kudu-based data warehouse? This is where Apache Flume comes in.</p>
+
+<h1 id="why-flume">Why Flume</h1>
+<p>According to their <a href="http://flume.apache.org/">website</a> “Flume is a distributed, reliable, and
+available service for efficiently collecting, aggregating, and moving large amounts of log data.
+It has a simple and flexible architecture based on streaming data flows. It is robust and fault
+tolerant with tunable reliability mechanisms and many failover and recovery mechanisms.” As you
+can see, nowhere is Hadoop mentioned but Flume is typically used for ingesting data to Hadoop
+clusters.</p>
+
+<p><img src="https://blogs.apache.org/flume/mediaresource/ab0d50f6-a960-42cc-971e-3da38ba3adad" alt="png" /></p>
+
+<p>Flume has an extensible architecture. An instance of Flume, called an <em>agent</em>, can have multiple
+<em>channels</em>, with each having multiple <em>sources</em> and <em>sinks</em> of various types. Sources queue data
+in channels, which in turn write out data to sinks. Such <em>pipelines</em> can be chained together to
+create even more complex ones. There may be more than one agent and agents can be configured to
+support failover and recovery.</p>
+
+<p>Flume comes with a bunch of built-in types of channels, sources and sinks. Memory channel is the
+default (an in-memory queue with no persistence to disk), but other options such as Kafka- and
+File-based channels are also provided. As for the sources, Avro, JMS, Thrift, spooling directory
+source are some of the built-in ones. Flume also ships with many sinks, including sinks for writing
+data to HDFS, HBase, Hive, Kafka, as well as to other Flume agents.</p>
+
+<p>In the rest of this post I’ll go over the Kudu Flume sink and show you how to configure Flume to
+write ingested data to a Kudu table. The sink has been part of the Kudu distribution since the 0.8
+release and the source code can be found <a href="https://github.com/apache/kudu/tree/master/java/kudu-flume-sink">here</a>.</p>
+
+<h1 id="configuring-the-kudu-flume-sink">Configuring the Kudu Flume Sink</h1>
+<p>Here is a sample flume configuration file:</p>
+
+<p>```
+agent1.sources  = source1
+agent1.channels = channel1
+agent1.sinks = sink1</p>
+
+<p>agent1.sources.source1.type = exec
+agent1.sources.source1.command = /usr/bin/vmstat 1
+agent1.sources.source1.channels = channel1</p>
+
+<p>agent1.channels.channel1.type = memory
+agent1.channels.channel1.capacity = 10000
+agent1.channels.channel1.transactionCapacity = 1000</p>
+
+<p>agent1.sinks.sink1.type = org.apache.flume.sink.kudu.KuduSink
+agent1.sinks.sink1.masterAddresses = localhost
+agent1.sinks.sink1.tableName = stats
+agent1.sinks.sink1.channel = channel1
+agent1.sinks.sink1.batchSize = 50
+agent1.sinks.sink1.producer = org.apache.kudu.flume.sink.SimpleKuduEventProducer
+```</p>
+
+<p>We define a source called <code>source1</code> which simply executes a <code>vmstat</code> command to continuously generate
+virtual memory statistics for the machine and queue events into an in-memory <code>channel1</code> channel,
+which in turn is used for writing these events to a Kudu table called <code>stats</code>. We are using
+<code>org.apache.kudu.flume.sink.SimpleKuduEventProducer</code> as the producer. <code>SimpleKuduEventProducer</code> is
+the built-in and default producer, but it’s implemented as a showcase for how to write Flume
+events into Kudu tables. For any serious functionality we’d have to write a custom producer. We
+need to make this producer and the <code>KuduSink</code> class available to Flume. We can do that by simply
+copying the <code>kudu-flume-sink-&lt;VERSION&gt;.jar</code> jar file from the Kudu distribution to the
+<code>$FLUME_HOME/plugins.d/kudu-sink/lib</code> directory in the Flume installation. The jar file contains
+<code>KuduSink</code> and all of its dependencies (including Kudu java client classes).</p>
+
+<p>At a minimum, the Kudu Flume Sink needs to know where the Kudu masters are
+(<code>agent1.sinks.sink1.masterAddresses = localhost</code>) and which Kudu table should be used for writing
+Flume events to (<code>agent1.sinks.sink1.tableName = stats</code>). The Kudu Flume Sink doesn’t create this
+table, it has to be created before the Kudu Flume Sink is started.</p>
+
+<p>You may also notice the <code>batchSize</code> parameter. Batch size is used for batching up to that many
+Flume events and flushing the entire batch in one shot. Tuning batchSize properly can have a huge
+impact on ingest performance of the Kudu cluster.</p>
+
+<p>Here is a complete list of KuduSink parameters:</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Parameter Name</th>
+      <th>Default</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>masterAddresses</td>
+      <td>N/A</td>
+      <td>Comma-separated list of “host:port” pairs of the masters (port optional)</td>
+    </tr>
+    <tr>
+      <td>tableName</td>
+      <td>N/A</td>
+      <td>The name of the table in Kudu to write to</td>
+    </tr>
+    <tr>
+      <td>producer</td>
+      <td>org.apache.kudu.flume.sink.SimpleKuduEventProducer</td>
+      <td>The fully qualified class name of the Kudu event producer the sink should use</td>
+    </tr>
+    <tr>
+      <td>batchSize</td>
+      <td>100</td>
+      <td>Maximum number of events the sink should take from the channel per transaction, if available</td>
+    </tr>
+    <tr>
+      <td>timeoutMillis</td>
+      <td>30000</td>
+      <td>Timeout period for Kudu operations, in milliseconds</td>
+    </tr>
+    <tr>
+      <td>ignoreDuplicateRows</td>
+      <td>true</td>
+      <td>Whether to ignore errors indicating that we attempted to insert duplicate rows into Kudu</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Let’s take a look at the source code for the built-in producer class:</p>
+
+<p>```
+public class SimpleKuduEventProducer implements KuduEventProducer {
+  private byte[] payload;
+  private KuduTable table;
+  private String payloadColumn;</p>
+
+<p>public SimpleKuduEventProducer(){
+  }</p>
+
+<p>@Override
+  public void configure(Context context) {
+    payloadColumn = context.getString(“payloadColumn”,”payload”);
+  }</p>
+
+<p>@Override
+  public void configure(ComponentConfiguration conf) {
+  }</p>
+
+<p>@Override
+  public void initialize(Event event, KuduTable table) {
+    this.payload = event.getBody();
+    this.table = table;
+  }</p>
+
+<p>@Override
+  public List<operation> getOperations() throws FlumeException {
+    try {
+      Insert insert = table.newInsert();
+      PartialRow row = insert.getRow();
+      row.addBinary(payloadColumn, payload);</operation></p>
+
+<pre><code>  return Collections.singletonList((Operation) insert);
+} catch (Exception e){
+  throw new FlumeException("Failed to create Kudu Insert object!", e);
+}   }
+</code></pre>
+
+<p>@Override
+  public void close() {
+  }
+}
+```</p>
+
+<p><code>SimpleKuduEventProducer</code> implements the <code>org.apache.kudu.flume.sink.KuduEventProducer</code> interface,
+which itself looks like this:</p>
+
+<p>```
+public interface KuduEventProducer extends Configurable, ConfigurableComponent {
+  /**
+   * Initialize the event producer.
+   * @param event to be written to Kudu
+   * @param table the KuduTable object used for creating Kudu Operation objects
+   */
+  void initialize(Event event, KuduTable table);</p>
+
+<p>/**
+   * Get the operations that should be written out to Kudu as a result of this
+   * event. This list is written to Kudu using the Kudu client API.
+   * @return List of {@link org.kududb.client.Operation} which
+   * are written as such to Kudu
+   */
+  List<operation> getOperations();</operation></p>
+
+<p>/*
+   * Clean up any state. This will be called when the sink is being stopped.
+   */
+  void close();
+}
+```</p>
+
+<p><code>public void configure(Context context)</code> is called when an instance of our producer is instantiated
+by the KuduSink. SimpleKuduEventProducer’s implementation looks for a producer parameter named
+<code>payloadColumn</code> and uses its value (“payload” if not overridden in Flume configuration file) as the
+column which will hold the value of the Flume event payload. If you recall from above, we had
+configured the KuduSink to listen for events generated from the <code>vmstat</code> command. Each output row
+from that command will be stored as a new row containing a <code>payload</code> column in the <code>stats</code> table.
+<code>SimpleKuduEventProducer</code> does not have any configuration parameters, but if it had any we would
+define them by prefixing it with <code>producer.</code> (<code>agent1.sinks.sink1.producer.parameter1</code> for
+example).</p>
+
+<p>The main producer logic resides in the <code>public List&lt;Operation&gt; getOperations()</code> method. In
+SimpleKuduEventProducer’s implementation we simply insert the binary body of the Flume event into
+the Kudu table. Here we call Kudu’s <code>newInsert()</code> to initiate an insert, but could have used
+<code>Upsert</code> if updating an existing row was also an option, in fact there’s another producer
+implementation available for doing just that: <code>SimpleKeyedKuduEventProducer</code>. Most probably you
+will need to write your own custom producer in the real world, but you can base your implementation
+on the built-in ones.</p>
+
+<p>In the future, we plan to add more flexible event producer implementations so that creation of a
+custom event producer is not required to write data to Kudu. See
+<a href="https://gerrit.cloudera.org/#/c/4034/">here</a> for a work-in-progress generic event producer for
+Avro-encoded Events.</p>
+
+<h1 id="conclusion">Conclusion</h1>
+<p>Kudu is a scalable data store which lets us ingest insane amounts of data per second. Apache Flume
+helps us aggregate data from various sources, and the Kudu Flume Sink lets us easily store
+the aggregated Flume events into Kudu. Together they enable us to create a data warehouse out of
+disparate sources.</p>
+
+<p><em>Ara Abrahamian is a software engineer at Argyle Data building fraud detection systems using
+sophisticated machine learning methods. Ara is the original author of the Flume Kudu Sink that
+is included in the Kudu distribution. You can follow him on Twitter at @ara_e.</em></p>
+
+  </div>
+</article>
+
+
+  </div>
+  <div class="col-lg-3 recent-posts">
+    <h3>Recent posts</h3>
+    <ul>
+    
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
+      <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
+    
+      <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
+    
+      <li> <a href="/2016/08/16/weekly-update.html">Apache Kudu Weekly Update August 16th, 2016</a> </li>
+    
+      <li> <a href="/2016/08/08/weekly-update.html">Apache Kudu Weekly Update August 8th, 2016</a> </li>
+    
+      <li> <a href="/2016/07/26/weekly-update.html">Apache Kudu Weekly Update July 26, 2016</a> </li>
+    
+      <li> <a href="/2016/07/25/asf-graduation.html">The Apache Software Foundation Announces Apache&reg; Kudu&trade; as a Top-Level Project</a> </li>
+    
+      <li> <a href="/2016/07/18/weekly-update.html">Apache Kudu (incubating) Weekly Update July 18, 2016</a> </li>
+    
+      <li> <a href="/2016/07/11/weekly-update.html">Apache Kudu (incubating) Weekly Update July 11, 2016</a> </li>
+    
+      <li> <a href="/2016/07/01/apache-kudu-0-9-1-released.html">Apache Kudu (incubating) 0.9.1 released</a> </li>
+    
+      <li> <a href="/2016/06/27/weekly-update.html">Apache Kudu (incubating) Weekly Update June 27, 2016</a> </li>
+    
+      <li> <a href="/2016/06/24/multi-master-1-0-0.html">Master fault tolerance in Kudu 1.0</a> </li>
+    
+      <li> <a href="/2016/06/21/weekly-update.html">Apache Kudu (incubating) Weekly Update June 21, 2016</a> </li>
+    
+      <li> <a href="/2016/06/17/raft-consensus-single-node.html">Using Raft Consensus on a Single Node</a> </li>
+    
+      <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
+    
+    </ul>
+  </div>
+</div>
+
+      <footer class="footer">
+        <p class="small">
+        Copyright &copy; 2016 The Apache Software Foundation. 
+        </p>
+      </footer>
+    </div>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
+    <script>
+      // Try to detect touch-screen devices. Note: Many laptops have touch screens.
+      $(document).ready(function() {
+        if ("ontouchstart" in document.documentElement) {
+          $(document.documentElement).addClass("touch");
+        } else {
+          $(document.documentElement).addClass("no-touch");
+        }
+      });
+    </script>
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"
+            integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS"
+            crossorigin="anonymous"></script>
+    <script>
+      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-68448017-1', 'auto');
+      ga('send', 'pageview');
+    </script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/3.1.0/anchor.js"></script>
+    <script>
+      anchors.options = {
+        placement: 'right',
+        visible: 'touch',
+      };
+      anchors.add();
+    </script>
+  </body>
+</html>
+

diff --git a/blog/index.html b/blog/index.html
index 406b3f2..44af7e6 100644
--- a/blog/index.html
+++ b/blog/index.html

@@ -111,6 +111,318 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a></h1>
+    <p class="meta">Posted 31 Aug 2016 by Ara Abrahamian</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>This post discusses the Kudu Flume Sink. First, I’ll give some background on why we considered
+using Kudu, what Flume does for us, and how Flume fits with Kudu in our project.</p>
+
+<h1 id="why-kudu">Why Kudu</h1>
+<p>Traditionally in the Hadoop ecosystem we’ve dealt with various <em>batch processing</em> technologies such
+as MapReduce and the many libraries and tools built on top of it in various languages (Apache Pig,
+Apache Hive, Apache Oozie and many others). The main problem with this approach is that it needs to
+process the whole data set in batches, again and again, as soon as new data gets added. Things get
+really complicated when a few such tasks need to get chained together, or when the same data set
+needs to be processed in various ways by different jobs, while all compete for the shared cluster
+resources.</p>
+
+<p>The opposite of this approach is <em>stream processing</em>: process the data as soon as it arrives, not
+in batches. Streaming systems such as Spark Streaming, Storm, Kafka Streams, and many others make
+this possible. But writing streaming services is not trivial. The streaming systems are becoming
+more and more capable and support more complex constructs, but they are not yet easy to use. All
+queries and processes need to be carefully planned and implemented.</p>
+
+<p>To summarize, <em>batch processing</em> is:</p>
+
+<ul>
+  <li>file-based</li>
+  <li>a paradigm that processes large chunks of data as a group</li>
+  <li>high latency and high throughput, both for ingest and query</li>
+  <li>typically easy to program, but hard to orchestrate</li>
+  <li>well suited for writing ad-hoc queries, although they are typically high latency</li>
+</ul>
+
+<p>While <em>stream processing</em> is:</p>
+
+<ul>
+  <li>a totally different paradigm, which involves single events and time windows instead of large groups of events</li>
+  <li>still file-based and not a long-term database</li>
+  <li>not batch-oriented, but incremental</li>
+  <li>ultra-fast ingest and ultra-fast query (query results basically pre-calculated)</li>
+  <li>not so easy to program, relatively easy to orchestrate</li>
+  <li>impossible to write ad-hoc queries</li>
+</ul>
+
+<p>And a Kudu-based <em>near real-time</em> approach is:</p>
+
+<ul>
+  <li>flexible and expressive, thanks to SQL support via Apache Impala (incubating)</li>
+  <li>a table-oriented, mutable data store that feels like a traditional relational database</li>
+  <li>very easy to program, you can even pretend it’s good old MySQL</li>
+  <li>low-latency and relatively high throughput, both for ingest and query</li>
+</ul>
+
+<p>At Argyle Data, we’re dealing with complex fraud detection scenarios. We need to ingest massive
+amounts of data, run machine learning algorithms and generate reports. When we created our current
+architecture two years ago we decided to opt for a database as the backbone of our system. That
+database is Apache Accumulo. It’s a key-value based database which runs on top of Hadoop HDFS,
+quite similar to HBase but with some important improvements such as cell level security and ease
+of deployment and management. To enable querying of this data for quite complex reporting and
+analytics, we used Presto, a distributed query engine with a pluggable architecture open-sourced
+by Facebook. We wrote a connector for it to let it run queries against the Accumulo database. This
+architecture has served us well, but there were a few problems:</p>
+
+<ul>
+  <li>we need to ingest even more massive volumes of data in real-time</li>
+  <li>we need to perform complex machine-learning calculations on even larger data-sets</li>
+  <li>we need to support ad-hoc queries, plus long-term data warehouse functionality</li>
+</ul>
+
+<p>So, we’ve started gradually moving the core machine-learning pipeline to a streaming based
+solution. This way we can ingest and process larger data-sets faster in the real-time. But then how
+would we take care of ad-hoc queries and long-term persistence? This is where Kudu comes in. While
+the machine learning pipeline ingests and processes real-time data, we store a copy of the same
+ingested data in Kudu for long-term access and ad-hoc queries. Kudu is our <em>data warehouse</em>. By
+using Kudu and Impala, we can retire our in-house Presto connector and rely on Impala’s
+super-fast query engine.</p>
+
+<p>But how would we make sure data is reliably ingested into the streaming pipeline <em>and</em> the
+Kudu-based data warehouse? This is where Apache Flume comes in.</p>
+
+<h1 id="why-flume">Why Flume</h1>
+<p>According to their <a href="http://flume.apache.org/">website</a> “Flume is a distributed, reliable, and
+available service for efficiently collecting, aggregating, and moving large amounts of log data.
+It has a simple and flexible architecture based on streaming data flows. It is robust and fault
+tolerant with tunable reliability mechanisms and many failover and recovery mechanisms.” As you
+can see, nowhere is Hadoop mentioned but Flume is typically used for ingesting data to Hadoop
+clusters.</p>
+
+<p><img src="https://blogs.apache.org/flume/mediaresource/ab0d50f6-a960-42cc-971e-3da38ba3adad" alt="png" /></p>
+
+<p>Flume has an extensible architecture. An instance of Flume, called an <em>agent</em>, can have multiple
+<em>channels</em>, with each having multiple <em>sources</em> and <em>sinks</em> of various types. Sources queue data
+in channels, which in turn write out data to sinks. Such <em>pipelines</em> can be chained together to
+create even more complex ones. There may be more than one agent and agents can be configured to
+support failover and recovery.</p>
+
+<p>Flume comes with a bunch of built-in types of channels, sources and sinks. Memory channel is the
+default (an in-memory queue with no persistence to disk), but other options such as Kafka- and
+File-based channels are also provided. As for the sources, Avro, JMS, Thrift, spooling directory
+source are some of the built-in ones. Flume also ships with many sinks, including sinks for writing
+data to HDFS, HBase, Hive, Kafka, as well as to other Flume agents.</p>
+
+<p>In the rest of this post I’ll go over the Kudu Flume sink and show you how to configure Flume to
+write ingested data to a Kudu table. The sink has been part of the Kudu distribution since the 0.8
+release and the source code can be found <a href="https://github.com/apache/kudu/tree/master/java/kudu-flume-sink">here</a>.</p>
+
+<h1 id="configuring-the-kudu-flume-sink">Configuring the Kudu Flume Sink</h1>
+<p>Here is a sample flume configuration file:</p>
+
+<p>```
+agent1.sources  = source1
+agent1.channels = channel1
+agent1.sinks = sink1</p>
+
+<p>agent1.sources.source1.type = exec
+agent1.sources.source1.command = /usr/bin/vmstat 1
+agent1.sources.source1.channels = channel1</p>
+
+<p>agent1.channels.channel1.type = memory
+agent1.channels.channel1.capacity = 10000
+agent1.channels.channel1.transactionCapacity = 1000</p>
+
+<p>agent1.sinks.sink1.type = org.apache.flume.sink.kudu.KuduSink
+agent1.sinks.sink1.masterAddresses = localhost
+agent1.sinks.sink1.tableName = stats
+agent1.sinks.sink1.channel = channel1
+agent1.sinks.sink1.batchSize = 50
+agent1.sinks.sink1.producer = org.apache.kudu.flume.sink.SimpleKuduEventProducer
+```</p>
+
+<p>We define a source called <code>source1</code> which simply executes a <code>vmstat</code> command to continuously generate
+virtual memory statistics for the machine and queue events into an in-memory <code>channel1</code> channel,
+which in turn is used for writing these events to a Kudu table called <code>stats</code>. We are using
+<code>org.apache.kudu.flume.sink.SimpleKuduEventProducer</code> as the producer. <code>SimpleKuduEventProducer</code> is
+the built-in and default producer, but it’s implemented as a showcase for how to write Flume
+events into Kudu tables. For any serious functionality we’d have to write a custom producer. We
+need to make this producer and the <code>KuduSink</code> class available to Flume. We can do that by simply
+copying the <code>kudu-flume-sink-&lt;VERSION&gt;.jar</code> jar file from the Kudu distribution to the
+<code>$FLUME_HOME/plugins.d/kudu-sink/lib</code> directory in the Flume installation. The jar file contains
+<code>KuduSink</code> and all of its dependencies (including Kudu java client classes).</p>
+
+<p>At a minimum, the Kudu Flume Sink needs to know where the Kudu masters are
+(<code>agent1.sinks.sink1.masterAddresses = localhost</code>) and which Kudu table should be used for writing
+Flume events to (<code>agent1.sinks.sink1.tableName = stats</code>). The Kudu Flume Sink doesn’t create this
+table, it has to be created before the Kudu Flume Sink is started.</p>
+
+<p>You may also notice the <code>batchSize</code> parameter. Batch size is used for batching up to that many
+Flume events and flushing the entire batch in one shot. Tuning batchSize properly can have a huge
+impact on ingest performance of the Kudu cluster.</p>
+
+<p>Here is a complete list of KuduSink parameters:</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Parameter Name</th>
+      <th>Default</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>masterAddresses</td>
+      <td>N/A</td>
+      <td>Comma-separated list of “host:port” pairs of the masters (port optional)</td>
+    </tr>
+    <tr>
+      <td>tableName</td>
+      <td>N/A</td>
+      <td>The name of the table in Kudu to write to</td>
+    </tr>
+    <tr>
+      <td>producer</td>
+      <td>org.apache.kudu.flume.sink.SimpleKuduEventProducer</td>
+      <td>The fully qualified class name of the Kudu event producer the sink should use</td>
+    </tr>
+    <tr>
+      <td>batchSize</td>
+      <td>100</td>
+      <td>Maximum number of events the sink should take from the channel per transaction, if available</td>
+    </tr>
+    <tr>
+      <td>timeoutMillis</td>
+      <td>30000</td>
+      <td>Timeout period for Kudu operations, in milliseconds</td>
+    </tr>
+    <tr>
+      <td>ignoreDuplicateRows</td>
+      <td>true</td>
+      <td>Whether to ignore errors indicating that we attempted to insert duplicate rows into Kudu</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Let’s take a look at the source code for the built-in producer class:</p>
+
+<p>```
+public class SimpleKuduEventProducer implements KuduEventProducer {
+  private byte[] payload;
+  private KuduTable table;
+  private String payloadColumn;</p>
+
+<p>public SimpleKuduEventProducer(){
+  }</p>
+
+<p>@Override
+  public void configure(Context context) {
+    payloadColumn = context.getString(“payloadColumn”,”payload”);
+  }</p>
+
+<p>@Override
+  public void configure(ComponentConfiguration conf) {
+  }</p>
+
+<p>@Override
+  public void initialize(Event event, KuduTable table) {
+    this.payload = event.getBody();
+    this.table = table;
+  }</p>
+
+<p>@Override
+  public List<operation> getOperations() throws FlumeException {
+    try {
+      Insert insert = table.newInsert();
+      PartialRow row = insert.getRow();
+      row.addBinary(payloadColumn, payload);</operation></p>
+
+<pre><code>  return Collections.singletonList((Operation) insert);
+} catch (Exception e){
+  throw new FlumeException("Failed to create Kudu Insert object!", e);
+}   }
+</code></pre>
+
+<p>@Override
+  public void close() {
+  }
+}
+```</p>
+
+<p><code>SimpleKuduEventProducer</code> implements the <code>org.apache.kudu.flume.sink.KuduEventProducer</code> interface,
+which itself looks like this:</p>
+
+<p>```
+public interface KuduEventProducer extends Configurable, ConfigurableComponent {
+  /**
+   * Initialize the event producer.
+   * @param event to be written to Kudu
+   * @param table the KuduTable object used for creating Kudu Operation objects
+   */
+  void initialize(Event event, KuduTable table);</p>
+
+<p>/**
+   * Get the operations that should be written out to Kudu as a result of this
+   * event. This list is written to Kudu using the Kudu client API.
+   * @return List of {@link org.kududb.client.Operation} which
+   * are written as such to Kudu
+   */
+  List<operation> getOperations();</operation></p>
+
+<p>/*
+   * Clean up any state. This will be called when the sink is being stopped.
+   */
+  void close();
+}
+```</p>
+
+<p><code>public void configure(Context context)</code> is called when an instance of our producer is instantiated
+by the KuduSink. SimpleKuduEventProducer’s implementation looks for a producer parameter named
+<code>payloadColumn</code> and uses its value (“payload” if not overridden in Flume configuration file) as the
+column which will hold the value of the Flume event payload. If you recall from above, we had
+configured the KuduSink to listen for events generated from the <code>vmstat</code> command. Each output row
+from that command will be stored as a new row containing a <code>payload</code> column in the <code>stats</code> table.
+<code>SimpleKuduEventProducer</code> does not have any configuration parameters, but if it had any we would
+define them by prefixing it with <code>producer.</code> (<code>agent1.sinks.sink1.producer.parameter1</code> for
+example).</p>
+
+<p>The main producer logic resides in the <code>public List&lt;Operation&gt; getOperations()</code> method. In
+SimpleKuduEventProducer’s implementation we simply insert the binary body of the Flume event into
+the Kudu table. Here we call Kudu’s <code>newInsert()</code> to initiate an insert, but could have used
+<code>Upsert</code> if updating an existing row was also an option, in fact there’s another producer
+implementation available for doing just that: <code>SimpleKeyedKuduEventProducer</code>. Most probably you
+will need to write your own custom producer in the real world, but you can base your implementation
+on the built-in ones.</p>
+
+<p>In the future, we plan to add more flexible event producer implementations so that creation of a
+custom event producer is not required to write data to Kudu. See
+<a href="https://gerrit.cloudera.org/#/c/4034/">here</a> for a work-in-progress generic event producer for
+Avro-encoded Events.</p>
+
+<h1 id="conclusion">Conclusion</h1>
+<p>Kudu is a scalable data store which lets us ingest insane amounts of data per second. Apache Flume
+helps us aggregate data from various sources, and the Kudu Flume Sink lets us easily store
+the aggregated Flume events into Kudu. Together they enable us to create a data warehouse out of
+disparate sources.</p>
+
+<p><em>Ara Abrahamian is a software engineer at Argyle Data building fraud detection systems using
+sophisticated machine learning methods. Ara is the original author of the Flume Kudu Sink that
+is included in the Kudu distribution. You can follow him on Twitter at @ara_e.</em></p>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/08/31/intro-flume-kudu-sink.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a></h1>
     <p class="meta">Posted 23 Aug 2016 by Dan Burkert</p>
   </header>
@@ -195,27 +507,6 @@
 
 
 
-<!-- Articles -->
-<article>
-  <header>
-    <h1 class="entry-title"><a href="/2016/07/26/weekly-update.html">Apache Kudu Weekly Update July 26, 2016</a></h1>
-    <p class="meta">Posted 26 Jul 2016 by Jean-Daniel Cryans</p>
-  </header>
-  <div class="entry-content">
-    
-    <p>Welcome to the eighteenth edition of the Kudu Weekly Update. This weekly blog post
-covers ongoing development and news in the Apache Kudu project.</p>
-
-
-    
-  </div>
-  <div class="read-full">
-    <a class="btn btn-info" href="/2016/07/26/weekly-update.html">Read full post...</a>
-  </div>
-</article>
-
-
-
 <!-- Pagination links -->
 
 <nav>
@@ -234,6 +525,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -262,8 +555,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/blog/page/2/index.html b/blog/page/2/index.html
index 7c342ec..472d5dc 100644
--- a/blog/page/2/index.html
+++ b/blog/page/2/index.html

@@ -111,6 +111,27 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/07/26/weekly-update.html">Apache Kudu Weekly Update July 26, 2016</a></h1>
+    <p class="meta">Posted 26 Jul 2016 by Jean-Daniel Cryans</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>Welcome to the eighteenth edition of the Kudu Weekly Update. This weekly blog post
+covers ongoing development and news in the Apache Kudu project.</p>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/07/26/weekly-update.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/07/25/asf-graduation.html">The Apache Software Foundation Announces Apache&reg; Kudu&trade; as a Top-Level Project</a></h1>
     <p class="meta">Posted 25 Jul 2016 by Jean-Daniel Cryans</p>
   </header>
@@ -203,27 +224,6 @@
 
 
 
-<!-- Articles -->
-<article>
-  <header>
-    <h1 class="entry-title"><a href="/2016/06/27/weekly-update.html">Apache Kudu (incubating) Weekly Update June 27, 2016</a></h1>
-    <p class="meta">Posted 27 Jun 2016 by Todd Lipcon</p>
-  </header>
-  <div class="entry-content">
-    
-    <p>Welcome to the fifteenth edition of the Kudu Weekly Update. This weekly blog post
-covers ongoing development and news in the Apache Kudu (incubating) project.</p>
-
-
-    
-  </div>
-  <div class="read-full">
-    <a class="btn btn-info" href="/2016/06/27/weekly-update.html">Read full post...</a>
-  </div>
-</article>
-
-
-
 <!-- Pagination links -->
 
 <nav>
@@ -244,6 +244,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -272,8 +274,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/blog/page/3/index.html b/blog/page/3/index.html
index 1d9b1b2..841a385 100644
--- a/blog/page/3/index.html
+++ b/blog/page/3/index.html

@@ -111,13 +111,34 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/06/27/weekly-update.html">Apache Kudu (incubating) Weekly Update June 27, 2016</a></h1>
+    <p class="meta">Posted 27 Jun 2016 by Todd Lipcon</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>Welcome to the fifteenth edition of the Kudu Weekly Update. This weekly blog post
+covers ongoing development and news in the Apache Kudu (incubating) project.</p>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/06/27/weekly-update.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/06/24/multi-master-1-0-0.html">Master fault tolerance in Kudu 1.0</a></h1>
     <p class="meta">Posted 24 Jun 2016 by Adar Dembo</p>
   </header>
   <div class="entry-content">
     
     <p>This blog post describes how the 1.0 release of Apache Kudu (incubating) will
-support fault tolerance for the Kudu master, finally eliminating Kudu&#8217;s last
+support fault tolerance for the Kudu master, finally eliminating Kudu’s last
 single point of failure.</p>
 
 
@@ -196,37 +217,6 @@
 
 
 
-<!-- Articles -->
-<article>
-  <header>
-    <h1 class="entry-title"><a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a></h1>
-    <p class="meta">Posted 10 Jun 2016 by Jean-Daniel Cryans</p>
-  </header>
-  <div class="entry-content">
-    
-    <p>The Apache Kudu (incubating) team is happy to announce the release of Kudu
-0.9.0!</p>
-
-<p>This latest version adds basic UPSERT functionality and an improved Apache Spark Data Source
-that doesn&#8217;t rely on the MapReduce I/O formats. It also improves Tablet Server
-restart time as well as write performance under high load. Finally, Kudu now enforces
-the specification of a partitioning scheme for new tables.</p>
-
-<ul>
-  <li>Read the detailed <a href="http://kudu.apache.org/releases/0.9.0/docs/release_notes.html">Kudu 0.9.0 release notes</a></li>
-  <li>Download the <a href="http://kudu.apache.org/releases/0.9.0/">Kudu 0.9.0 source release</a></li>
-</ul>
-
-
-    
-  </div>
-  <div class="read-full">
-    <a class="btn btn-info" href="/2016/06/10/apache-kudu-0-9-0-released.html">Read full post...</a>
-  </div>
-</article>
-
-
-
 <!-- Pagination links -->
 
 <nav>
@@ -247,6 +237,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -275,8 +267,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/blog/page/4/index.html b/blog/page/4/index.html
index b5df952..9a36dc8 100644
--- a/blog/page/4/index.html
+++ b/blog/page/4/index.html

@@ -111,6 +111,37 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a></h1>
+    <p class="meta">Posted 10 Jun 2016 by Jean-Daniel Cryans</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>The Apache Kudu (incubating) team is happy to announce the release of Kudu
+0.9.0!</p>
+
+<p>This latest version adds basic UPSERT functionality and an improved Apache Spark Data Source
+that doesn’t rely on the MapReduce I/O formats. It also improves Tablet Server
+restart time as well as write performance under high load. Finally, Kudu now enforces
+the specification of a partitioning scheme for new tables.</p>
+
+<ul>
+  <li>Read the detailed <a href="http://kudu.apache.org/releases/0.9.0/docs/release_notes.html">Kudu 0.9.0 release notes</a></li>
+  <li>Download the <a href="http://kudu.apache.org/releases/0.9.0/">Kudu 0.9.0 source release</a></li>
+</ul>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/06/10/apache-kudu-0-9-0-released.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/06/06/weekly-update.html">Apache Kudu (incubating) Weekly Update June 6, 2016</a></h1>
     <p class="meta">Posted 06 Jun 2016 by Jean-Daniel Cryans</p>
   </header>
@@ -194,27 +225,6 @@
 
 
 
-<!-- Articles -->
-<article>
-  <header>
-    <h1 class="entry-title"><a href="/2016/05/16/weekly-update.html">Apache Kudu (incubating) Weekly Update May 16, 2016</a></h1>
-    <p class="meta">Posted 16 May 2016 by Todd Lipcon</p>
-  </header>
-  <div class="entry-content">
-    
-    <p>Welcome to the ninth edition of the Kudu Weekly Update. This weekly blog post
-covers ongoing development and news in the Apache Kudu (incubating) project.</p>
-
-
-    
-  </div>
-  <div class="read-full">
-    <a class="btn btn-info" href="/2016/05/16/weekly-update.html">Read full post...</a>
-  </div>
-</article>
-
-
-
 <!-- Pagination links -->
 
 <nav>
@@ -235,6 +245,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -263,8 +275,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/blog/page/5/index.html b/blog/page/5/index.html
index 633ddd3..f35c743 100644
--- a/blog/page/5/index.html
+++ b/blog/page/5/index.html

@@ -111,6 +111,27 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/05/16/weekly-update.html">Apache Kudu (incubating) Weekly Update May 16, 2016</a></h1>
+    <p class="meta">Posted 16 May 2016 by Todd Lipcon</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>Welcome to the ninth edition of the Kudu Weekly Update. This weekly blog post
+covers ongoing development and news in the Apache Kudu (incubating) project.</p>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/05/16/weekly-update.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/05/09/weekly-update.html">Apache Kudu (incubating) Weekly Update May 9, 2016</a></h1>
     <p class="meta">Posted 09 May 2016 by Jean-Daniel Cryans</p>
   </header>
@@ -158,7 +179,7 @@
   </header>
   <div class="entry-content">
     
-    <p>Recently, I wanted to stress-test and benchmark some changes to the Kudu RPC server, and decided to use YCSB as a way to generate reasonable load. While running YCSB, I noticed interesting results, and what started as an unrelated testing exercise eventually yielded some new insights into Kudu&#8217;s behavior. These insights will motivate changes to default Kudu settings and code in upcoming versions. This post details the benchmark setup, analysis, and conclusions.</p>
+    <p>Recently, I wanted to stress-test and benchmark some changes to the Kudu RPC server, and decided to use YCSB as a way to generate reasonable load. While running YCSB, I noticed interesting results, and what started as an unrelated testing exercise eventually yielded some new insights into Kudu’s behavior. These insights will motivate changes to default Kudu settings and code in upcoming versions. This post details the benchmark setup, analysis, and conclusions.</p>
 
 
     
@@ -191,29 +212,6 @@
 
 
 
-<!-- Articles -->
-<article>
-  <header>
-    <h1 class="entry-title"><a href="/2016/04/19/kudu-0-8-0-predicate-improvements.html">Predicate Improvements in Kudu 0.8</a></h1>
-    <p class="meta">Posted 19 Apr 2016 by Dan Burkert</p>
-  </header>
-  <div class="entry-content">
-    
-    <p>The recently released Kudu version 0.8 ships with a host of new improvements to
-scan predicates. Performance and usability have been improved, especially for
-tables taking advantage of <a href="http://kudu.apache.org/docs/schema_design.html#data-distribution">advanced partitioning
-options</a>.</p>
-
-
-    
-  </div>
-  <div class="read-full">
-    <a class="btn btn-info" href="/2016/04/19/kudu-0-8-0-predicate-improvements.html">Read full post...</a>
-  </div>
-</article>
-
-
-
 <!-- Pagination links -->
 
 <nav>
@@ -234,6 +232,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -262,8 +262,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/blog/page/6/index.html b/blog/page/6/index.html
index 32e0626..7e526c2 100644
--- a/blog/page/6/index.html
+++ b/blog/page/6/index.html

@@ -111,6 +111,29 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/04/19/kudu-0-8-0-predicate-improvements.html">Predicate Improvements in Kudu 0.8</a></h1>
+    <p class="meta">Posted 19 Apr 2016 by Dan Burkert</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>The recently released Kudu version 0.8 ships with a host of new improvements to
+scan predicates. Performance and usability have been improved, especially for
+tables taking advantage of <a href="http://kudu.apache.org/docs/schema_design.html#data-distribution">advanced partitioning
+options</a>.</p>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/04/19/kudu-0-8-0-predicate-improvements.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/04/18/weekly-update.html">Apache Kudu (incubating) Weekly Update April 18, 2016</a></h1>
     <p class="meta">Posted 18 Apr 2016 by Todd Lipcon</p>
   </header>
@@ -211,27 +234,6 @@
 
 
 
-<!-- Articles -->
-<article>
-  <header>
-    <h1 class="entry-title"><a href="/2016/04/04/weekly-update.html">Apache Kudu (incubating) Weekly Update April 4, 2016</a></h1>
-    <p class="meta">Posted 04 Apr 2016 by Todd Lipcon</p>
-  </header>
-  <div class="entry-content">
-    
-    <p>Welcome to the third edition of the Kudu Weekly Update. This weekly blog post
-covers ongoing development and news in the Apache Kudu (incubating) project.</p>
-
-
-    
-  </div>
-  <div class="read-full">
-    <a class="btn btn-info" href="/2016/04/04/weekly-update.html">Read full post...</a>
-  </div>
-</article>
-
-
-
 <!-- Pagination links -->
 
 <nav>
@@ -252,6 +254,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -280,8 +284,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/blog/page/7/index.html b/blog/page/7/index.html
index 690ab6b..7f46619 100644
--- a/blog/page/7/index.html
+++ b/blog/page/7/index.html

@@ -111,13 +111,34 @@
 <!-- Articles -->
 <article>
   <header>
+    <h1 class="entry-title"><a href="/2016/04/04/weekly-update.html">Apache Kudu (incubating) Weekly Update April 4, 2016</a></h1>
+    <p class="meta">Posted 04 Apr 2016 by Todd Lipcon</p>
+  </header>
+  <div class="entry-content">
+    
+    <p>Welcome to the third edition of the Kudu Weekly Update. This weekly blog post
+covers ongoing development and news in the Apache Kudu (incubating) project.</p>
+
+
+    
+  </div>
+  <div class="read-full">
+    <a class="btn btn-info" href="/2016/04/04/weekly-update.html">Read full post...</a>
+  </div>
+</article>
+
+
+
+<!-- Articles -->
+<article>
+  <header>
     <h1 class="entry-title"><a href="/2016/03/28/weekly-update.html">Apache Kudu (incubating) Weekly Update March 28, 2016</a></h1>
     <p class="meta">Posted 28 Mar 2016 by Todd Lipcon</p>
   </header>
   <div class="entry-content">
     
-    <p>Welcome to the second edition of the Kudu Weekly Update. As with last week&#8217;s
-inaugural post, we&#8217;ll cover ongoing development and news in the Apache Kudu
+    <p>Welcome to the second edition of the Kudu Weekly Update. As with last week’s
+inaugural post, we’ll cover ongoing development and news in the Apache Kudu
 project on a weekly basis.</p>
 
 
@@ -138,13 +159,13 @@
   </header>
   <div class="entry-content">
     
-    <p>Kudu is a fast-moving young open source project, and we&#8217;ve heard from a few
-members of the community that it can be difficult to keep track of what&#8217;s
+    <p>Kudu is a fast-moving young open source project, and we’ve heard from a few
+members of the community that it can be difficult to keep track of what’s
 going on day-to-day. A typical month comprises 80-100 individual patches
 committed and hundreds of code review and discussion
 emails. So, inspired by similar weekly newsletters like
-<a href="http://llvmweekly.org/">LLVM Weekly</a> and <a href="http://lwn.net/Kernel/">LWN&#8217;s weekly kernel coverage</a>
-we&#8217;re going to experiment with our own weekly newsletter covering
+<a href="http://llvmweekly.org/">LLVM Weekly</a> and <a href="http://lwn.net/Kernel/">LWN’s weekly kernel coverage</a>
+we’re going to experiment with our own weekly newsletter covering
 recent development and Kudu-related news.</p>
 
 
@@ -223,6 +244,8 @@
     <h3>Recent posts</h3>
     <ul>
     
+      <li> <a href="/2016/08/31/intro-flume-kudu-sink.html">An Introduction to Kudu Flume Sink</a> </li>
+    
       <li> <a href="/2016/08/23/new-range-partitioning-features.html">New Range Partitioning Features in Kudu 0.10</a> </li>
     
       <li> <a href="/2016/08/23/apache-kudu-0-10-0-released.html">Apache Kudu 0.10.0 released</a> </li>
@@ -251,8 +274,6 @@
     
       <li> <a href="/2016/06/13/weekly-update.html">Apache Kudu (incubating) Weekly Update June 13, 2016</a> </li>
     
-      <li> <a href="/2016/06/10/apache-kudu-0-9-0-released.html">Apache Kudu (incubating) 0.9.0 released</a> </li>
-    
     </ul>
   </div>
 </div>

diff --git a/feed.xml b/feed.xml
index 222da78..6800739 100644
--- a/feed.xml
+++ b/feed.xml

@@ -1,4 +1,588 @@
-<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"><generator uri="http://jekyllrb.com" version="2.5.3">Jekyll</generator><link href="/feed.xml" rel="self" type="application/atom+xml" /><link href="/" rel="alternate" type="text/html" /><updated>2016-08-29T11:21:53-07:00</updated><id>/</id><entry><title>New Range Partitioning Features in Kudu 0.10</title><link href="/2016/08/23/new-range-partitioning-features.html" rel="alternate" type="text/html" title="New Range Partitioning Features in Kudu 0.10" /><published>2016-08-23T00:00:00-07:00</published><updated>2016-08-23T00:00:00-07:00</updated><id>/2016/08/23/new-range-partitioning-features</id><content type="html" xml:base="/2016/08/23/new-range-partitioning-features.html">&lt;p&gt;Kudu 0.10 is shipping with a few important new features for range partitioning.
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"><generator uri="http://jekyllrb.com" version="2.5.3">Jekyll</generator><link href="/feed.xml" rel="self" type="application/atom+xml" /><link href="/" rel="alternate" type="text/html" /><updated>2016-08-31T09:22:20-07:00</updated><id>/</id><entry><title>An Introduction to Kudu Flume Sink</title><link href="/2016/08/31/intro-flume-kudu-sink.html" rel="alternate" type="text/html" title="An Introduction to Kudu Flume Sink" /><published>2016-08-31T00:00:00-07:00</published><updated>2016-08-31T00:00:00-07:00</updated><id>/2016/08/31/intro-flume-kudu-sink</id><content type="html" xml:base="/2016/08/31/intro-flume-kudu-sink.html">&lt;p&gt;This post discusses the Kudu Flume Sink. First, I’ll give some background on why we considered
+using Kudu, what Flume does for us, and how Flume fits with Kudu in our project.&lt;/p&gt;
+
+&lt;h1 id=&quot;why-kudu&quot;&gt;Why Kudu&lt;/h1&gt;
+&lt;p&gt;Traditionally in the Hadoop ecosystem we’ve dealt with various &lt;em&gt;batch processing&lt;/em&gt; technologies such
+as MapReduce and the many libraries and tools built on top of it in various languages (Apache Pig,
+Apache Hive, Apache Oozie and many others). The main problem with this approach is that it needs to
+process the whole data set in batches, again and again, as soon as new data gets added. Things get
+really complicated when a few such tasks need to get chained together, or when the same data set
+needs to be processed in various ways by different jobs, while all compete for the shared cluster
+resources.&lt;/p&gt;
+
+&lt;p&gt;The opposite of this approach is &lt;em&gt;stream processing&lt;/em&gt;: process the data as soon as it arrives, not
+in batches. Streaming systems such as Spark Streaming, Storm, Kafka Streams, and many others make
+this possible. But writing streaming services is not trivial. The streaming systems are becoming
+more and more capable and support more complex constructs, but they are not yet easy to use. All
+queries and processes need to be carefully planned and implemented.&lt;/p&gt;
+
+&lt;p&gt;To summarize, &lt;em&gt;batch processing&lt;/em&gt; is:&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;file-based&lt;/li&gt;
+  &lt;li&gt;a paradigm that processes large chunks of data as a group&lt;/li&gt;
+  &lt;li&gt;high latency and high throughput, both for ingest and query&lt;/li&gt;
+  &lt;li&gt;typically easy to program, but hard to orchestrate&lt;/li&gt;
+  &lt;li&gt;well suited for writing ad-hoc queries, although they are typically high latency&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;p&gt;While &lt;em&gt;stream processing&lt;/em&gt; is:&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;a totally different paradigm, which involves single events and time windows instead of large groups of events&lt;/li&gt;
+  &lt;li&gt;still file-based and not a long-term database&lt;/li&gt;
+  &lt;li&gt;not batch-oriented, but incremental&lt;/li&gt;
+  &lt;li&gt;ultra-fast ingest and ultra-fast query (query results basically pre-calculated)&lt;/li&gt;
+  &lt;li&gt;not so easy to program, relatively easy to orchestrate&lt;/li&gt;
+  &lt;li&gt;impossible to write ad-hoc queries&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;p&gt;And a Kudu-based &lt;em&gt;near real-time&lt;/em&gt; approach is:&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;flexible and expressive, thanks to SQL support via Apache Impala (incubating)&lt;/li&gt;
+  &lt;li&gt;a table-oriented, mutable data store that feels like a traditional relational database&lt;/li&gt;
+  &lt;li&gt;very easy to program, you can even pretend it’s good old MySQL&lt;/li&gt;
+  &lt;li&gt;low-latency and relatively high throughput, both for ingest and query&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;p&gt;At Argyle Data, we’re dealing with complex fraud detection scenarios. We need to ingest massive
+amounts of data, run machine learning algorithms and generate reports. When we created our current
+architecture two years ago we decided to opt for a database as the backbone of our system. That
+database is Apache Accumulo. It’s a key-value based database which runs on top of Hadoop HDFS,
+quite similar to HBase but with some important improvements such as cell level security and ease
+of deployment and management. To enable querying of this data for quite complex reporting and
+analytics, we used Presto, a distributed query engine with a pluggable architecture open-sourced
+by Facebook. We wrote a connector for it to let it run queries against the Accumulo database. This
+architecture has served us well, but there were a few problems:&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;we need to ingest even more massive volumes of data in real-time&lt;/li&gt;
+  &lt;li&gt;we need to perform complex machine-learning calculations on even larger data-sets&lt;/li&gt;
+  &lt;li&gt;we need to support ad-hoc queries, plus long-term data warehouse functionality&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;p&gt;So, we’ve started gradually moving the core machine-learning pipeline to a streaming based
+solution. This way we can ingest and process larger data-sets faster in the real-time. But then how
+would we take care of ad-hoc queries and long-term persistence? This is where Kudu comes in. While
+the machine learning pipeline ingests and processes real-time data, we store a copy of the same
+ingested data in Kudu for long-term access and ad-hoc queries. Kudu is our &lt;em&gt;data warehouse&lt;/em&gt;. By
+using Kudu and Impala, we can retire our in-house Presto connector and rely on Impala’s
+super-fast query engine.&lt;/p&gt;
+
+&lt;p&gt;But how would we make sure data is reliably ingested into the streaming pipeline &lt;em&gt;and&lt;/em&gt; the
+Kudu-based data warehouse? This is where Apache Flume comes in.&lt;/p&gt;
+
+&lt;h1 id=&quot;why-flume&quot;&gt;Why Flume&lt;/h1&gt;
+&lt;p&gt;According to their &lt;a href=&quot;http://flume.apache.org/&quot;&gt;website&lt;/a&gt; “Flume is a distributed, reliable, and
+available service for efficiently collecting, aggregating, and moving large amounts of log data.
+It has a simple and flexible architecture based on streaming data flows. It is robust and fault
+tolerant with tunable reliability mechanisms and many failover and recovery mechanisms.” As you
+can see, nowhere is Hadoop mentioned but Flume is typically used for ingesting data to Hadoop
+clusters.&lt;/p&gt;
+
+&lt;p&gt;&lt;img src=&quot;https://blogs.apache.org/flume/mediaresource/ab0d50f6-a960-42cc-971e-3da38ba3adad&quot; alt=&quot;png&quot; /&gt;&lt;/p&gt;
+
+&lt;p&gt;Flume has an extensible architecture. An instance of Flume, called an &lt;em&gt;agent&lt;/em&gt;, can have multiple
+&lt;em&gt;channels&lt;/em&gt;, with each having multiple &lt;em&gt;sources&lt;/em&gt; and &lt;em&gt;sinks&lt;/em&gt; of various types. Sources queue data
+in channels, which in turn write out data to sinks. Such &lt;em&gt;pipelines&lt;/em&gt; can be chained together to
+create even more complex ones. There may be more than one agent and agents can be configured to
+support failover and recovery.&lt;/p&gt;
+
+&lt;p&gt;Flume comes with a bunch of built-in types of channels, sources and sinks. Memory channel is the
+default (an in-memory queue with no persistence to disk), but other options such as Kafka- and
+File-based channels are also provided. As for the sources, Avro, JMS, Thrift, spooling directory
+source are some of the built-in ones. Flume also ships with many sinks, including sinks for writing
+data to HDFS, HBase, Hive, Kafka, as well as to other Flume agents.&lt;/p&gt;
+
+&lt;p&gt;In the rest of this post I’ll go over the Kudu Flume sink and show you how to configure Flume to
+write ingested data to a Kudu table. The sink has been part of the Kudu distribution since the 0.8
+release and the source code can be found &lt;a href=&quot;https://github.com/apache/kudu/tree/master/java/kudu-flume-sink&quot;&gt;here&lt;/a&gt;.&lt;/p&gt;
+
+&lt;h1 id=&quot;configuring-the-kudu-flume-sink&quot;&gt;Configuring the Kudu Flume Sink&lt;/h1&gt;
+&lt;p&gt;Here is a sample flume configuration file:&lt;/p&gt;
+
+&lt;p&gt;```
+agent1.sources  = source1
+agent1.channels = channel1
+agent1.sinks = sink1&lt;/p&gt;
+
+&lt;p&gt;agent1.sources.source1.type = exec
+agent1.sources.source1.command = /usr/bin/vmstat 1
+agent1.sources.source1.channels = channel1&lt;/p&gt;
+
+&lt;p&gt;agent1.channels.channel1.type = memory
+agent1.channels.channel1.capacity = 10000
+agent1.channels.channel1.transactionCapacity = 1000&lt;/p&gt;
+
+&lt;p&gt;agent1.sinks.sink1.type = org.apache.flume.sink.kudu.KuduSink
+agent1.sinks.sink1.masterAddresses = localhost
+agent1.sinks.sink1.tableName = stats
+agent1.sinks.sink1.channel = channel1
+agent1.sinks.sink1.batchSize = 50
+agent1.sinks.sink1.producer = org.apache.kudu.flume.sink.SimpleKuduEventProducer
+```&lt;/p&gt;
+
+&lt;p&gt;We define a source called &lt;code&gt;source1&lt;/code&gt; which simply executes a &lt;code&gt;vmstat&lt;/code&gt; command to continuously generate
+virtual memory statistics for the machine and queue events into an in-memory &lt;code&gt;channel1&lt;/code&gt; channel,
+which in turn is used for writing these events to a Kudu table called &lt;code&gt;stats&lt;/code&gt;. We are using
+&lt;code&gt;org.apache.kudu.flume.sink.SimpleKuduEventProducer&lt;/code&gt; as the producer. &lt;code&gt;SimpleKuduEventProducer&lt;/code&gt; is
+the built-in and default producer, but it’s implemented as a showcase for how to write Flume
+events into Kudu tables. For any serious functionality we’d have to write a custom producer. We
+need to make this producer and the &lt;code&gt;KuduSink&lt;/code&gt; class available to Flume. We can do that by simply
+copying the &lt;code&gt;kudu-flume-sink-&amp;lt;VERSION&amp;gt;.jar&lt;/code&gt; jar file from the Kudu distribution to the
+&lt;code&gt;$FLUME_HOME/plugins.d/kudu-sink/lib&lt;/code&gt; directory in the Flume installation. The jar file contains
+&lt;code&gt;KuduSink&lt;/code&gt; and all of its dependencies (including Kudu java client classes).&lt;/p&gt;
+
+&lt;p&gt;At a minimum, the Kudu Flume Sink needs to know where the Kudu masters are
+(&lt;code&gt;agent1.sinks.sink1.masterAddresses = localhost&lt;/code&gt;) and which Kudu table should be used for writing
+Flume events to (&lt;code&gt;agent1.sinks.sink1.tableName = stats&lt;/code&gt;). The Kudu Flume Sink doesn’t create this
+table, it has to be created before the Kudu Flume Sink is started.&lt;/p&gt;
+
+&lt;p&gt;You may also notice the &lt;code&gt;batchSize&lt;/code&gt; parameter. Batch size is used for batching up to that many
+Flume events and flushing the entire batch in one shot. Tuning batchSize properly can have a huge
+impact on ingest performance of the Kudu cluster.&lt;/p&gt;
+
+&lt;p&gt;Here is a complete list of KuduSink parameters:&lt;/p&gt;
+
+&lt;table&gt;
+  &lt;thead&gt;
+    &lt;tr&gt;
+      &lt;th&gt;Parameter Name&lt;/th&gt;
+      &lt;th&gt;Default&lt;/th&gt;
+      &lt;th&gt;Description&lt;/th&gt;
+    &lt;/tr&gt;
+  &lt;/thead&gt;
+  &lt;tbody&gt;
+    &lt;tr&gt;
+      &lt;td&gt;masterAddresses&lt;/td&gt;
+      &lt;td&gt;N/A&lt;/td&gt;
+      &lt;td&gt;Comma-separated list of “host:port” pairs of the masters (port optional)&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;tableName&lt;/td&gt;
+      &lt;td&gt;N/A&lt;/td&gt;
+      &lt;td&gt;The name of the table in Kudu to write to&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;producer&lt;/td&gt;
+      &lt;td&gt;org.apache.kudu.flume.sink.SimpleKuduEventProducer&lt;/td&gt;
+      &lt;td&gt;The fully qualified class name of the Kudu event producer the sink should use&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;batchSize&lt;/td&gt;
+      &lt;td&gt;100&lt;/td&gt;
+      &lt;td&gt;Maximum number of events the sink should take from the channel per transaction, if available&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;timeoutMillis&lt;/td&gt;
+      &lt;td&gt;30000&lt;/td&gt;
+      &lt;td&gt;Timeout period for Kudu operations, in milliseconds&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;ignoreDuplicateRows&lt;/td&gt;
+      &lt;td&gt;true&lt;/td&gt;
+      &lt;td&gt;Whether to ignore errors indicating that we attempted to insert duplicate rows into Kudu&lt;/td&gt;
+    &lt;/tr&gt;
+  &lt;/tbody&gt;
+&lt;/table&gt;
+
+&lt;p&gt;Let’s take a look at the source code for the built-in producer class:&lt;/p&gt;
+
+&lt;p&gt;```
+public class SimpleKuduEventProducer implements KuduEventProducer {
+  private byte[] payload;
+  private KuduTable table;
+  private String payloadColumn;&lt;/p&gt;
+
+&lt;p&gt;public SimpleKuduEventProducer(){
+  }&lt;/p&gt;
+
+&lt;p&gt;@Override
+  public void configure(Context context) {
+    payloadColumn = context.getString(“payloadColumn”,”payload”);
+  }&lt;/p&gt;
+
+&lt;p&gt;@Override
+  public void configure(ComponentConfiguration conf) {
+  }&lt;/p&gt;
+
+&lt;p&gt;@Override
+  public void initialize(Event event, KuduTable table) {
+    this.payload = event.getBody();
+    this.table = table;
+  }&lt;/p&gt;
+
+&lt;p&gt;@Override
+  public List&lt;operation&gt; getOperations() throws FlumeException {
+    try {
+      Insert insert = table.newInsert();
+      PartialRow row = insert.getRow();
+      row.addBinary(payloadColumn, payload);&lt;/operation&gt;&lt;/p&gt;
+
+&lt;pre&gt;&lt;code&gt;  return Collections.singletonList((Operation) insert);
+} catch (Exception e){
+  throw new FlumeException(&quot;Failed to create Kudu Insert object!&quot;, e);
+}   }
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;@Override
+  public void close() {
+  }
+}
+```&lt;/p&gt;
+
+&lt;p&gt;&lt;code&gt;SimpleKuduEventProducer&lt;/code&gt; implements the &lt;code&gt;org.apache.kudu.flume.sink.KuduEventProducer&lt;/code&gt; interface,
+which itself looks like this:&lt;/p&gt;
+
+&lt;p&gt;```
+public interface KuduEventProducer extends Configurable, ConfigurableComponent {
+  /**
+   * Initialize the event producer.
+   * @param event to be written to Kudu
+   * @param table the KuduTable object used for creating Kudu Operation objects
+   */
+  void initialize(Event event, KuduTable table);&lt;/p&gt;
+
+&lt;p&gt;/**
+   * Get the operations that should be written out to Kudu as a result of this
+   * event. This list is written to Kudu using the Kudu client API.
+   * @return List of {@link org.kududb.client.Operation} which
+   * are written as such to Kudu
+   */
+  List&lt;operation&gt; getOperations();&lt;/operation&gt;&lt;/p&gt;
+
+&lt;p&gt;/*
+   * Clean up any state. This will be called when the sink is being stopped.
+   */
+  void close();
+}
+```&lt;/p&gt;
+
+&lt;p&gt;&lt;code&gt;public void configure(Context context)&lt;/code&gt; is called when an instance of our producer is instantiated
+by the KuduSink. SimpleKuduEventProducer’s implementation looks for a producer parameter named
+&lt;code&gt;payloadColumn&lt;/code&gt; and uses its value (“payload” if not overridden in Flume configuration file) as the
+column which will hold the value of the Flume event payload. If you recall from above, we had
+configured the KuduSink to listen for events generated from the &lt;code&gt;vmstat&lt;/code&gt; command. Each output row
+from that command will be stored as a new row containing a &lt;code&gt;payload&lt;/code&gt; column in the &lt;code&gt;stats&lt;/code&gt; table.
+&lt;code&gt;SimpleKuduEventProducer&lt;/code&gt; does not have any configuration parameters, but if it had any we would
+define them by prefixing it with &lt;code&gt;producer.&lt;/code&gt; (&lt;code&gt;agent1.sinks.sink1.producer.parameter1&lt;/code&gt; for
+example).&lt;/p&gt;
+
+&lt;p&gt;The main producer logic resides in the &lt;code&gt;public List&amp;lt;Operation&amp;gt; getOperations()&lt;/code&gt; method. In
+SimpleKuduEventProducer’s implementation we simply insert the binary body of the Flume event into
+the Kudu table. Here we call Kudu’s &lt;code&gt;newInsert()&lt;/code&gt; to initiate an insert, but could have used
+&lt;code&gt;Upsert&lt;/code&gt; if updating an existing row was also an option, in fact there’s another producer
+implementation available for doing just that: &lt;code&gt;SimpleKeyedKuduEventProducer&lt;/code&gt;. Most probably you
+will need to write your own custom producer in the real world, but you can base your implementation
+on the built-in ones.&lt;/p&gt;
+
+&lt;p&gt;In the future, we plan to add more flexible event producer implementations so that creation of a
+custom event producer is not required to write data to Kudu. See
+&lt;a href=&quot;https://gerrit.cloudera.org/#/c/4034/&quot;&gt;here&lt;/a&gt; for a work-in-progress generic event producer for
+Avro-encoded Events.&lt;/p&gt;
+
+&lt;h1 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h1&gt;
+&lt;p&gt;Kudu is a scalable data store which lets us ingest insane amounts of data per second. Apache Flume
+helps us aggregate data from various sources, and the Kudu Flume Sink lets us easily store
+the aggregated Flume events into Kudu. Together they enable us to create a data warehouse out of
+disparate sources.&lt;/p&gt;
+
+&lt;p&gt;&lt;em&gt;Ara Abrahamian is a software engineer at Argyle Data building fraud detection systems using
+sophisticated machine learning methods. Ara is the original author of the Flume Kudu Sink that
+is included in the Kudu distribution. You can follow him on Twitter at @ara_e.&lt;/em&gt;&lt;/p&gt;</content><author><name>Ara Abrahamian</name></author><summary>This post discusses the Kudu Flume Sink. First, I’ll give some background on why we considered
+using Kudu, what Flume does for us, and how Flume fits with Kudu in our project.
+
+Why Kudu
+Traditionally in the Hadoop ecosystem we’ve dealt with various batch processing technologies such
+as MapReduce and the many libraries and tools built on top of it in various languages (Apache Pig,
+Apache Hive, Apache Oozie and many others). The main problem with this approach is that it needs to
+process the whole data set in batches, again and again, as soon as new data gets added. Things get
+really complicated when a few such tasks need to get chained together, or when the same data set
+needs to be processed in various ways by different jobs, while all compete for the shared cluster
+resources.
+
+The opposite of this approach is stream processing: process the data as soon as it arrives, not
+in batches. Streaming systems such as Spark Streaming, Storm, Kafka Streams, and many others make
+this possible. But writing streaming services is not trivial. The streaming systems are becoming
+more and more capable and support more complex constructs, but they are not yet easy to use. All
+queries and processes need to be carefully planned and implemented.
+
+To summarize, batch processing is:
+
+
+  file-based
+  a paradigm that processes large chunks of data as a group
+  high latency and high throughput, both for ingest and query
+  typically easy to program, but hard to orchestrate
+  well suited for writing ad-hoc queries, although they are typically high latency
+
+
+While stream processing is:
+
+
+  a totally different paradigm, which involves single events and time windows instead of large groups of events
+  still file-based and not a long-term database
+  not batch-oriented, but incremental
+  ultra-fast ingest and ultra-fast query (query results basically pre-calculated)
+  not so easy to program, relatively easy to orchestrate
+  impossible to write ad-hoc queries
+
+
+And a Kudu-based near real-time approach is:
+
+
+  flexible and expressive, thanks to SQL support via Apache Impala (incubating)
+  a table-oriented, mutable data store that feels like a traditional relational database
+  very easy to program, you can even pretend it’s good old MySQL
+  low-latency and relatively high throughput, both for ingest and query
+
+
+At Argyle Data, we’re dealing with complex fraud detection scenarios. We need to ingest massive
+amounts of data, run machine learning algorithms and generate reports. When we created our current
+architecture two years ago we decided to opt for a database as the backbone of our system. That
+database is Apache Accumulo. It’s a key-value based database which runs on top of Hadoop HDFS,
+quite similar to HBase but with some important improvements such as cell level security and ease
+of deployment and management. To enable querying of this data for quite complex reporting and
+analytics, we used Presto, a distributed query engine with a pluggable architecture open-sourced
+by Facebook. We wrote a connector for it to let it run queries against the Accumulo database. This
+architecture has served us well, but there were a few problems:
+
+
+  we need to ingest even more massive volumes of data in real-time
+  we need to perform complex machine-learning calculations on even larger data-sets
+  we need to support ad-hoc queries, plus long-term data warehouse functionality
+
+
+So, we’ve started gradually moving the core machine-learning pipeline to a streaming based
+solution. This way we can ingest and process larger data-sets faster in the real-time. But then how
+would we take care of ad-hoc queries and long-term persistence? This is where Kudu comes in. While
+the machine learning pipeline ingests and processes real-time data, we store a copy of the same
+ingested data in Kudu for long-term access and ad-hoc queries. Kudu is our data warehouse. By
+using Kudu and Impala, we can retire our in-house Presto connector and rely on Impala’s
+super-fast query engine.
+
+But how would we make sure data is reliably ingested into the streaming pipeline and the
+Kudu-based data warehouse? This is where Apache Flume comes in.
+
+Why Flume
+According to their website “Flume is a distributed, reliable, and
+available service for efficiently collecting, aggregating, and moving large amounts of log data.
+It has a simple and flexible architecture based on streaming data flows. It is robust and fault
+tolerant with tunable reliability mechanisms and many failover and recovery mechanisms.” As you
+can see, nowhere is Hadoop mentioned but Flume is typically used for ingesting data to Hadoop
+clusters.
+
+
+
+Flume has an extensible architecture. An instance of Flume, called an agent, can have multiple
+channels, with each having multiple sources and sinks of various types. Sources queue data
+in channels, which in turn write out data to sinks. Such pipelines can be chained together to
+create even more complex ones. There may be more than one agent and agents can be configured to
+support failover and recovery.
+
+Flume comes with a bunch of built-in types of channels, sources and sinks. Memory channel is the
+default (an in-memory queue with no persistence to disk), but other options such as Kafka- and
+File-based channels are also provided. As for the sources, Avro, JMS, Thrift, spooling directory
+source are some of the built-in ones. Flume also ships with many sinks, including sinks for writing
+data to HDFS, HBase, Hive, Kafka, as well as to other Flume agents.
+
+In the rest of this post I’ll go over the Kudu Flume sink and show you how to configure Flume to
+write ingested data to a Kudu table. The sink has been part of the Kudu distribution since the 0.8
+release and the source code can be found here.
+
+Configuring the Kudu Flume Sink
+Here is a sample flume configuration file:
+
+```
+agent1.sources  = source1
+agent1.channels = channel1
+agent1.sinks = sink1
+
+agent1.sources.source1.type = exec
+agent1.sources.source1.command = /usr/bin/vmstat 1
+agent1.sources.source1.channels = channel1
+
+agent1.channels.channel1.type = memory
+agent1.channels.channel1.capacity = 10000
+agent1.channels.channel1.transactionCapacity = 1000
+
+agent1.sinks.sink1.type = org.apache.flume.sink.kudu.KuduSink
+agent1.sinks.sink1.masterAddresses = localhost
+agent1.sinks.sink1.tableName = stats
+agent1.sinks.sink1.channel = channel1
+agent1.sinks.sink1.batchSize = 50
+agent1.sinks.sink1.producer = org.apache.kudu.flume.sink.SimpleKuduEventProducer
+```
+
+We define a source called source1 which simply executes a vmstat command to continuously generate
+virtual memory statistics for the machine and queue events into an in-memory channel1 channel,
+which in turn is used for writing these events to a Kudu table called stats. We are using
+org.apache.kudu.flume.sink.SimpleKuduEventProducer as the producer. SimpleKuduEventProducer is
+the built-in and default producer, but it’s implemented as a showcase for how to write Flume
+events into Kudu tables. For any serious functionality we’d have to write a custom producer. We
+need to make this producer and the KuduSink class available to Flume. We can do that by simply
+copying the kudu-flume-sink-&amp;lt;VERSION&amp;gt;.jar jar file from the Kudu distribution to the
+$FLUME_HOME/plugins.d/kudu-sink/lib directory in the Flume installation. The jar file contains
+KuduSink and all of its dependencies (including Kudu java client classes).
+
+At a minimum, the Kudu Flume Sink needs to know where the Kudu masters are
+(agent1.sinks.sink1.masterAddresses = localhost) and which Kudu table should be used for writing
+Flume events to (agent1.sinks.sink1.tableName = stats). The Kudu Flume Sink doesn’t create this
+table, it has to be created before the Kudu Flume Sink is started.
+
+You may also notice the batchSize parameter. Batch size is used for batching up to that many
+Flume events and flushing the entire batch in one shot. Tuning batchSize properly can have a huge
+impact on ingest performance of the Kudu cluster.
+
+Here is a complete list of KuduSink parameters:
+
+
+  
+    
+      Parameter Name
+      Default
+      Description
+    
+  
+  
+    
+      masterAddresses
+      N/A
+      Comma-separated list of “host:port” pairs of the masters (port optional)
+    
+    
+      tableName
+      N/A
+      The name of the table in Kudu to write to
+    
+    
+      producer
+      org.apache.kudu.flume.sink.SimpleKuduEventProducer
+      The fully qualified class name of the Kudu event producer the sink should use
+    
+    
+      batchSize
+      100
+      Maximum number of events the sink should take from the channel per transaction, if available
+    
+    
+      timeoutMillis
+      30000
+      Timeout period for Kudu operations, in milliseconds
+    
+    
+      ignoreDuplicateRows
+      true
+      Whether to ignore errors indicating that we attempted to insert duplicate rows into Kudu
+    
+  
+
+
+Let’s take a look at the source code for the built-in producer class:
+
+```
+public class SimpleKuduEventProducer implements KuduEventProducer {
+  private byte[] payload;
+  private KuduTable table;
+  private String payloadColumn;
+
+public SimpleKuduEventProducer(){
+  }
+
+@Override
+  public void configure(Context context) {
+    payloadColumn = context.getString(“payloadColumn”,”payload”);
+  }
+
+@Override
+  public void configure(ComponentConfiguration conf) {
+  }
+
+@Override
+  public void initialize(Event event, KuduTable table) {
+    this.payload = event.getBody();
+    this.table = table;
+  }
+
+@Override
+  public List getOperations() throws FlumeException {
+    try {
+      Insert insert = table.newInsert();
+      PartialRow row = insert.getRow();
+      row.addBinary(payloadColumn, payload);
+
+  return Collections.singletonList((Operation) insert);
+} catch (Exception e){
+  throw new FlumeException(&quot;Failed to create Kudu Insert object!&quot;, e);
+}   }
+
+
+@Override
+  public void close() {
+  }
+}
+```
+
+SimpleKuduEventProducer implements the org.apache.kudu.flume.sink.KuduEventProducer interface,
+which itself looks like this:
+
+```
+public interface KuduEventProducer extends Configurable, ConfigurableComponent {
+  /**
+   * Initialize the event producer.
+   * @param event to be written to Kudu
+   * @param table the KuduTable object used for creating Kudu Operation objects
+   */
+  void initialize(Event event, KuduTable table);
+
+/**
+   * Get the operations that should be written out to Kudu as a result of this
+   * event. This list is written to Kudu using the Kudu client API.
+   * @return List of {@link org.kududb.client.Operation} which
+   * are written as such to Kudu
+   */
+  List getOperations();
+
+/*
+   * Clean up any state. This will be called when the sink is being stopped.
+   */
+  void close();
+}
+```
+
+public void configure(Context context) is called when an instance of our producer is instantiated
+by the KuduSink. SimpleKuduEventProducer’s implementation looks for a producer parameter named
+payloadColumn and uses its value (“payload” if not overridden in Flume configuration file) as the
+column which will hold the value of the Flume event payload. If you recall from above, we had
+configured the KuduSink to listen for events generated from the vmstat command. Each output row
+from that command will be stored as a new row containing a payload column in the stats table.
+SimpleKuduEventProducer does not have any configuration parameters, but if it had any we would
+define them by prefixing it with producer. (agent1.sinks.sink1.producer.parameter1 for
+example).
+
+The main producer logic resides in the public List&amp;lt;Operation&amp;gt; getOperations() method. In
+SimpleKuduEventProducer’s implementation we simply insert the binary body of the Flume event into
+the Kudu table. Here we call Kudu’s newInsert() to initiate an insert, but could have used
+Upsert if updating an existing row was also an option, in fact there’s another producer
+implementation available for doing just that: SimpleKeyedKuduEventProducer. Most probably you
+will need to write your own custom producer in the real world, but you can base your implementation
+on the built-in ones.
+
+In the future, we plan to add more flexible event producer implementations so that creation of a
+custom event producer is not required to write data to Kudu. See
+here for a work-in-progress generic event producer for
+Avro-encoded Events.
+
+Conclusion
+Kudu is a scalable data store which lets us ingest insane amounts of data per second. Apache Flume
+helps us aggregate data from various sources, and the Kudu Flume Sink lets us easily store
+the aggregated Flume events into Kudu. Together they enable us to create a data warehouse out of
+disparate sources.
+
+Ara Abrahamian is a software engineer at Argyle Data building fraud detection systems using
+sophisticated machine learning methods. Ara is the original author of the Flume Kudu Sink that
+is included in the Kudu distribution. You can follow him on Twitter at @ara_e.</summary></entry><entry><title>New Range Partitioning Features in Kudu 0.10</title><link href="/2016/08/23/new-range-partitioning-features.html" rel="alternate" type="text/html" title="New Range Partitioning Features in Kudu 0.10" /><published>2016-08-23T00:00:00-07:00</published><updated>2016-08-23T00:00:00-07:00</updated><id>/2016/08/23/new-range-partitioning-features</id><content type="html" xml:base="/2016/08/23/new-range-partitioning-features.html">&lt;p&gt;Kudu 0.10 is shipping with a few important new features for range partitioning.
 These features are designed to make Kudu easier to scale for certain workloads,
 like time series. This post will introduce these features, and discuss how to use
 them to effectively design tables for scalability and performance.&lt;/p&gt;
@@ -563,106 +1147,4 @@
 
 
   Read the detailed Kudu 0.9.1 release notes
-  Download the Kudu 0.9.1 source release</summary></entry><entry><title>Apache Kudu (incubating) Weekly Update June 27, 2016</title><link href="/2016/06/27/weekly-update.html" rel="alternate" type="text/html" title="Apache Kudu (incubating) Weekly Update June 27, 2016" /><published>2016-06-27T00:00:00-07:00</published><updated>2016-06-27T00:00:00-07:00</updated><id>/2016/06/27/weekly-update</id><content type="html" xml:base="/2016/06/27/weekly-update.html">&lt;p&gt;Welcome to the fifteenth edition of the Kudu Weekly Update. This weekly blog post
-covers ongoing development and news in the Apache Kudu (incubating) project.&lt;/p&gt;
-
-&lt;!--more--&gt;
-
-&lt;h2 id=&quot;development-discussions-and-code-in-progress&quot;&gt;Development discussions and code in progress&lt;/h2&gt;
-
-&lt;ul&gt;
-  &lt;li&gt;
-    &lt;p&gt;Todd Lipcon diagnosed and fixed a &lt;a href=&quot;https://gerrit.cloudera.org/3445&quot;&gt;tricky bug&lt;/a&gt;
-which could cause Kudu servers to crash under load. It turned out that the bug
-was in a synchronization profiling code path related to the tcmalloc allocator.
-This allocator is used in release builds, but can’t be used in instrumented builds
-such as
-&lt;a href=&quot;http://clang.llvm.org/docs/AddressSanitizer.html&quot;&gt;AddressSanitizer&lt;/a&gt; or
-&lt;a href=&quot;http://clang.llvm.org/docs/ThreadSanitizer.html&quot;&gt;ThreadSanitizer&lt;/a&gt;. This made it particularly difficult
-to catch. The bug fix will be released in the upcoming 0.9.1 release.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Todd also finished and committed a fix for &lt;a href=&quot;https://issues.apache.org/jira/browse/KUDU-1469&quot;&gt;KUDU-1469&lt;/a&gt;,
-a bug in which Kudu’s implementation of Raft consensus could get “stuck” not making
-progress replicating operations for a tablet. See the
-&lt;a href=&quot;https://gerrit.cloudera.org/#/c/3228/7/src/kudu/integration-tests/raft_consensus-itest.cc&quot;&gt;new integration test case&lt;/a&gt;
-for more details on this bug.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Mike Percy finished implementing and committed a feature which allows
-&lt;a href=&quot;https://gerrit.cloudera.org/#/c/3135/&quot;&gt;reserving disk space for non-Kudu processes&lt;/a&gt;.
-This feature causes Kudu to stop allocating new data blocks on a
-disk if it is within a user-specified threshold of being full, preventing
-possible crashes and allowing for safer collocation of Kudu with other processes
-on a cluster.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Will Berkeley finished implementing &lt;a href=&quot;https://issues.apache.org/jira/browse/KUDU-1398&quot;&gt;KUDU-1398&lt;/a&gt;,
-a new optimization which reduces the amount of disk space used by
-indexing structures in Kudu’s internal storage format. This should
-improve storage efficiency for workloads with large keys, and can
-also improve write performance by increasing the number of index
-entries which can fit in a given amount of cache memory.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;David Alves has completed posting a patch series that implements
-exactly-once RPC semantics. The design, as mentioned in previous
-blog posts, is described in a &lt;a href=&quot;https://gerrit.cloudera.org/#/c/2642/&quot;&gt;design document&lt;/a&gt;
-and the patches can be found in a 10-patch series starting with
-&lt;a href=&quot;https://gerrit.cloudera.org/#/c/3190/&quot;&gt;gerrit #3190&lt;/a&gt;.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Dan Burkert is continuing working on adding support for
-&lt;a href=&quot;https://github.com/apache/incubator-kudu/blob/master/docs/design-docs/non-covering-range-partitions.md&quot;&gt;tables with range partitions that don’t cover the entire key
-space&lt;/a&gt;.
-This past week, he focused on adding &lt;a href=&quot;https://gerrit.cloudera.org/#/c/3388/&quot;&gt;support in the the Java client&lt;/a&gt;
-which also necessitated some serious &lt;a href=&quot;https://gerrit.cloudera.org/#/c/3477/&quot;&gt;refactoring&lt;/a&gt;. These patches
-are now under review.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Congratulations to Andrew Wong, a new contributor who committed his
-first patches this week. Andrew &lt;a href=&quot;https://gerrit.cloudera.org/#/c/3424/&quot;&gt;improved the build docs for OSX&lt;/a&gt;
-and also fixed a &lt;a href=&quot;https://gerrit.cloudera.org/#/c/3486/&quot;&gt;crash if the user forgot to specify the master address
-in some command line tools&lt;/a&gt;.
-Thanks, Andrew!&lt;/p&gt;
-  &lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;h2 id=&quot;project-news&quot;&gt;Project news&lt;/h2&gt;
-
-&lt;ul&gt;
-  &lt;li&gt;
-    &lt;p&gt;The Apache Kudu web site has finished migrating to Apache Software Foundation infrastructure.
-The site can now be found at &lt;a href=&quot;http://kudu.incubator.apache.org/&quot;&gt;kudu.incubator.apache.org&lt;/a&gt;.
-Existing links will automatically redirect.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;A Kudu 0.9.1 release candidate was posted and passed a
-&lt;a href=&quot;http://mail-archives.apache.org/mod_mbox/incubator-kudu-dev/201606.mbox/%3CCADY20s6%3D%2BnKNgvx%3DG_pKupQGiH%2B9ToS53LqExBwWM6vLp-ns9A%40mail.gmail.com%3E&quot;&gt;release vote&lt;/a&gt;
-by the Kudu Podling PMC (PPMC).
-The release candidate will now be voted upon by the Apache Incubator PMC. If all goes well, we
-can expect a release late this week. The release fixes a few critical bugs discovered in 0.9.0.&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Chris Mattmann, one of Kudu’s mentors from the Apache Incubator,
-started a &lt;a href=&quot;http://mail-archives.apache.org/mod_mbox/incubator-kudu-dev/201606.mbox/%3CAD4A858D-403D-4E74-A4F4-DE2F08FB761E%40jpl.nasa.gov%3E&quot;&gt;discussion&lt;/a&gt;
-about the project’s graduation to a top-level project (TLP).
-Initial responses seem to be positive, so the next step will
-be to work on a draft resolution and various stages of
-voting.&lt;/p&gt;
-  &lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;h2 id=&quot;on-the-kudu-blog&quot;&gt;On the Kudu blog&lt;/h2&gt;
-
-&lt;ul&gt;
-  &lt;li&gt;Adar Dembo published a post detailing his recent work on
-&lt;a href=&quot;http://kudu.apache.org/2016/06/24/multi-master-1-0-0.html&quot;&gt;master fault tolerance in Kudu 1.0&lt;/a&gt;.&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;p&gt;Want to learn more about a specific topic from this blog post? Shoot an email to the
-&lt;a href=&quot;&amp;#109;&amp;#097;&amp;#105;&amp;#108;&amp;#116;&amp;#111;:&amp;#117;&amp;#115;&amp;#101;&amp;#114;&amp;#064;&amp;#107;&amp;#117;&amp;#100;&amp;#117;&amp;#046;&amp;#105;&amp;#110;&amp;#099;&amp;#117;&amp;#098;&amp;#097;&amp;#116;&amp;#111;&amp;#114;&amp;#046;&amp;#097;&amp;#112;&amp;#097;&amp;#099;&amp;#104;&amp;#101;&amp;#046;&amp;#111;&amp;#114;&amp;#103;&quot;&gt;kudu-user mailing list&lt;/a&gt; or
-tweet at &lt;a href=&quot;https://twitter.com/ApacheKudu&quot;&gt;@ApacheKudu&lt;/a&gt;. Similarly, if you’re
-aware of some Kudu news we missed, let us know so we can cover it in
-a future post.&lt;/p&gt;</content><author><name>Todd Lipcon</name></author><summary>Welcome to the fifteenth edition of the Kudu Weekly Update. This weekly blog post
-covers ongoing development and news in the Apache Kudu (incubating) project.</summary></entry></feed>
+  Download the Kudu 0.9.1 source release</summary></entry></feed>
commit	3f1e7692f0a87ca4b48b739aa2ca6042383b215b	[log] [tgz]
author	Mike Percy <mpercy@cloudera.com>	Wed Aug 31 09:22:22 2016 -0700
committer	Mike Percy <mpercy@cloudera.com>	Wed Aug 31 09:22:22 2016 -0700
tree	7595c4286d9ac8c74742303e895d4289f45b9019
parent	40648a098a8060638a306109f679c5d094b7b4dd [diff]