Improve SIMD code generation for primitive predicates

This adds a local (on-stack) copy of the bounds for range and equality
predicates before evaluating them against the columns. These on-stack
copies help the compiler realize that the stores to the selection vector
can't overwrite the predicate itself, and thus allows SIMD code
generation.

Benchmarked with column_predicate-test. Highlighting the 'NOT NULL'
results (since this doesn't change the evaluation of nulls):

Before:
 int8   NOT NULL   (c >= 0 AND c < 2) 1363.5M evals/sec	2.09 cycles/eval
 int16  NOT NULL   (c >= 0 AND c < 2) 1238.3M evals/sec	2.30 cycles/eval
 int32  NOT NULL   (c >= 0 AND c < 2) 1321.3M evals/sec	2.15 cycles/eval
 int64  NOT NULL   (c >= 0 AND c < 2) 1408.3M evals/sec	2.02 cycles/eval
 float  NOT NULL   (c >= 0 AND c < 2) 1134.8M evals/sec	2.52 cycles/eval
 double NOT NULL   (c >= 0 AND c < 2) 1144.2M evals/sec	2.49 cycles/eval

After:
 int8   NOT NULL   (c >= 0 AND c < 2) 3152.2M evals/sec	0.88 cycles/eval
 int16  NOT NULL   (c >= 0 AND c < 2) 3309.6M evals/sec	0.85 cycles/eval
 int32  NOT NULL   (c >= 0 AND c < 2) 3384.0M evals/sec	0.85 cycles/eval
 int64  NOT NULL   (c >= 0 AND c < 2) 1847.6M evals/sec	1.57 cycles/eval
 float  NOT NULL   (c >= 0 AND c < 2) 3268.3M evals/sec	0.88 cycles/eval
 double NOT NULL   (c >= 0 AND c < 2) 2245.2M evals/sec	1.27 cycles/eval

The numbers for non-range predicates didn't seem to change here.

Change-Id: I1772584c1d0c53128608ea26248dd4ab069b8108
Reviewed-on: http://gerrit.cloudera.org:8080/14855
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Kudu Jenkins
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index bea1142..2c84e85 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -673,7 +673,7 @@
   const cpp_type* data = reinterpret_cast<const cpp_type*>(block.data());
   const int n_chunks = block.nrows() / 8;
   for (int i = 0; i < n_chunks; i++) {
-    uint8_t res_8 = 0;;
+    uint8_t res_8 = 0;
     for (int j = 0; j < 8; j++) {
       res_8 |= p(data++) << j;
     }
@@ -733,27 +733,34 @@
 template <DataType PhysicalType>
 void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
                                               SelectionVector* sel) const {
+  using traits = DataTypeTraits<PhysicalType>;
+  using cpp_type = typename traits::cpp_type;
+
   switch (predicate_type()) {
     case PredicateType::Range: {
+      cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) : cpp_type();
+      cpp_type local_upper = upper_ ? *static_cast<const cpp_type*>(upper_) : cpp_type();
+
       if (lower_ == nullptr) {
-        ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-          return DataTypeTraits<PhysicalType>::Compare(cell, this->upper_) < 0;
+        ApplyPredicate<PhysicalType>(block, sel, [local_upper] (const void* cell) {
+            return traits::Compare(cell, &local_upper) < 0;
         });
       } else if (upper_ == nullptr) {
-        ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-          return DataTypeTraits<PhysicalType>::Compare(cell, this->lower_) >= 0;
+        ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void* cell) {
+            return traits::Compare(cell, &local_lower) >= 0;
         });
       } else {
-        ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-          return DataTypeTraits<PhysicalType>::Compare(cell, this->upper_) < 0 &&
-                 DataTypeTraits<PhysicalType>::Compare(cell, this->lower_) >= 0;
+        ApplyPredicate<PhysicalType>(block, sel, [local_lower, local_upper] (const void* cell) {
+            return traits::Compare(cell, &local_upper) < 0 &&
+                   traits::Compare(cell, &local_lower) >= 0;
         });
       }
       return;
     };
     case PredicateType::Equality: {
-      ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-        return DataTypeTraits<PhysicalType>::Compare(cell, this->lower_) == 0;
+      cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) : cpp_type();
+      ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void* cell) {
+            return traits::Compare(cell, &local_lower) == 0;
       });
       return;
     };
@@ -774,7 +781,7 @@
       ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
         return std::binary_search(values_.begin(), values_.end(), cell,
                                   [] (const void* lhs, const void* rhs) {
-                                    return DataTypeTraits<PhysicalType>::Compare(lhs, rhs) < 0;
+                                    return traits::Compare(lhs, rhs) < 0;
                                   });
       });
       return;
diff --git a/src/kudu/common/column_predicate.h b/src/kudu/common/column_predicate.h
index 2527fe1..963a172 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -22,6 +22,7 @@
 #include <cstdint>
 #include <ostream>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <boost/optional/optional.hpp>