Merge pull request #53 from DataSketches/JaccardSimilarityUDF

Jaccard similarity udf
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java
index 84ca5d9..d1bc8df 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java
@@ -124,12 +124,12 @@
   @Override // TOP LEVEL EXEC
   public Tuple exec(final Tuple inputTuple) throws IOException {
     //The exec is a stateless function. It operates on the input and returns a result.
-    if (inputTuple != null && inputTuple.size() > 0) {
+    if ((inputTuple != null) && (inputTuple.size() > 0)) {
       final DoublesUnion union = unionBuilder_.build();
       final DataBag bag = (DataBag) inputTuple.get(0);
       for (final Tuple innerTuple: bag) {
-        Object value = innerTuple.get(0);
-        if(value != null) {
+        final Object value = innerTuple.get(0);
+        if (value != null) {
           union.update((Double) value);
         }
       }
@@ -172,15 +172,15 @@
    */
   @Override
   public void accumulate(final Tuple inputTuple) throws IOException {
-    if (inputTuple == null || inputTuple.size() == 0) { return; }
+    if ((inputTuple == null) || (inputTuple.size() == 0)) { return; }
     final DataBag bag = (DataBag) inputTuple.get(0);
     if (bag == null) { return; }
     if (accumUnion_ == null) {
       accumUnion_ = unionBuilder_.build();
     }
     for (final Tuple innerTuple: bag) {
-      Object value = innerTuple.get(0);
-        if(value != null) {
+      final Object value = innerTuple.get(0);
+        if (value != null) {
           accumUnion_.update((Double) value);
       }
     }
@@ -309,7 +309,7 @@
 
     @Override // IntermediateFinal exec
     public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
-      if (inputTuple != null && inputTuple.size() > 0) {
+      if ((inputTuple != null) && (inputTuple.size() > 0)) {
         final DoublesUnion union = unionBuilder_.build();
         final DataBag outerBag = (DataBag) inputTuple.get(0);
         for (final Tuple dataTuple: outerBag) {
@@ -323,8 +323,8 @@
             // It is due to system bagged outputs from multiple mapper Initial functions.
             // The Intermediate stage was bypassed.
             for (final Tuple innerTuple: innerBag) {
-              Object value = innerTuple.get(0);
-              if(value != null) {
+              final Object value = innerTuple.get(0);
+              if (value != null) {
                 union.update((Double) value);
               }
             }
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java
index c1d28bb..fcccfb6 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java
@@ -116,14 +116,14 @@
   @Override // TOP LEVEL EXEC
   public Tuple exec(final Tuple inputTuple) throws IOException {
     //The exec is a stateless function. It operates on the input and returns a result.
-    if (inputTuple != null && inputTuple.size() > 0) {
+    if ((inputTuple != null) && (inputTuple.size() > 0)) {
       final ItemsUnion<T> union = k_ > 0
           ? ItemsUnion.getInstance(k_, comparator_)
           : ItemsUnion.getInstance(comparator_);
       final DataBag bag = (DataBag) inputTuple.get(0);
       for (final Tuple innerTuple: bag) {
-        Object value = innerTuple.get(0);
-        if(value != null) {
+        final Object value = innerTuple.get(0);
+        if (value != null) {
           union.update(extractValue(value));
         }
       }
@@ -167,7 +167,7 @@
    */
   @Override
   public void accumulate(final Tuple inputTuple) throws IOException {
-    if (inputTuple == null || inputTuple.size() == 0) { return; }
+    if ((inputTuple == null) || (inputTuple.size() == 0)) { return; }
     final DataBag bag = (DataBag) inputTuple.get(0);
     if (bag == null) { return; }
     if (accumUnion_ == null) {
@@ -176,8 +176,8 @@
         : ItemsUnion.getInstance(comparator_);
     }
     for (final Tuple innerTuple: bag) {
-      Object value = innerTuple.get(0);
-      if(value != null) {
+      final Object value = innerTuple.get(0);
+      if (value != null) {
         accumUnion_.update(extractValue(value));
       }
     }
@@ -297,7 +297,7 @@
 
     @Override // IntermediateFinal exec
     public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
-      if (inputTuple != null && inputTuple.size() > 0) {
+      if ((inputTuple != null) && (inputTuple.size() > 0)) {
         final ItemsUnion<T> union = k_ > 0
             ? ItemsUnion.getInstance(k_, comparator_)
             : ItemsUnion.getInstance(comparator_);
@@ -313,8 +313,8 @@
             // It is due to system bagged outputs from multiple mapper Initial functions.
             // The Intermediate stage was bypassed.
             for (final Tuple innerTuple: innerBag) {
-              Object value = innerTuple.get(0);
-              if(value != null) {
+              final Object value = innerTuple.get(0);
+              if (value != null) {
                 union.update(extractValue(value));
               }
             }
diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml
index bfdf482..0a67506 100644
--- a/tools/SketchesCheckstyle.xml
+++ b/tools/SketchesCheckstyle.xml
@@ -4,34 +4,21 @@
       "http://www.puppycrawl.com/dtds/configuration_1_3.dtd"> <!-- does not work with https -->
 
 <!--
-  This configuration file was written by the eclipse-cs plugin configuration editor
-  Checkstyle-Configuration: SketchesCheckstyle
+  SketchesCheckstyle.xml for sketches-core
 
   Checkstyle is very configurable. Be sure to read the documentation at
   http://checkstyle.sourceforge.net (or in your downloaded distribution). Note: Does not work with https.
 
-  Most Checks are configurable, be sure to consult the documentation.
-
   To completely disable a check, just comment it out or delete it from the file.
 
   Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
-
 -->
 
 <module name = "Checker">
   <property name="charset" value="UTF-8"/>
-
   <property name="severity" value="warning"/>
-
   <property name="fileExtensions" value="java, properties, xml"/>
-  
-  <!-- Enable suppression using comments: //CHECKSTYLE.OFF RULE and //CHECKSTYLE.ON RULE-->
-  <module name="SuppressionCommentFilter">
-    <property name="offCommentFormat" value="CHECKSTYLE.OFF\: ([\w\|]+)"/>
-    <property name="onCommentFormat" value="CHECKSTYLE.ON\: ([\w\|]+)"/>
-    <property name="checkFormat" value="$1"/>
-  </module>
-  
+
   <module name="FileTabCharacter">
     <property name="eachLine" value="true"/>
   </module>
@@ -53,7 +40,7 @@
       <property name="tokens" value="VARIABLE_DEF"/>
       <property name="allowSamelineMultipleAnnotations" value="true"/>
     </module>
-  
+
     <!-- Block Checks -->
     <module name="EmptyBlock">
       <property name="severity" value="warning"/>
@@ -71,7 +58,6 @@
     <module name="LeftCurly"> 
       <!-- doesn't allow for if (n == 0) { return 0.0; }, which is readable and not corruptable -->
       <property name="severity" value="ignore"/>
-      <property name="maxLineLength" value="100"/>
       <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
     </module>
     
@@ -144,7 +130,6 @@
     
     <module name="FinalClass"/>
     
-    
     <!-- Imports -->
     <module name="AvoidStarImport"/>
     
@@ -160,6 +145,15 @@
     
     <module name="UnusedImports"/>
     
+    <!-- Filters -->
+    <!-- Enable suppression using comments: //CHECKSTYLE.OFF "RULE" and //CHECKSTYLE.ON "RULE"
+     You must specify the specific rule, as in: //CHECKSTYLE.OFF: LineLength -->
+    <module name="SuppressionCommentFilter">
+      <property name="offCommentFormat" value="CHECKSTYLE.OFF\: ([\w\|]+)"/>
+      <property name="onCommentFormat" value="CHECKSTYLE.ON\: ([\w\|]+)"/>
+      <property name="checkFormat" value="$1"/>
+    </module>
+    
     <!-- Javadoc Comments -->
     <!-- JavadocPackage under Checker -->
     <module name="AtclauseOrder">
@@ -215,8 +209,6 @@
       <!-- <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> -->
     </module>
     
-    <module name="FileContentsHolder"/> <!-- Used with SuppressionCommentFilter -->
-    
     <module name="Indentation">
       <property name="severity" value="ignore"/>
       <property name="basicOffset" value="2"/>