Hacked up code to test run ratios of SSB sequences.

commit: 420afef4583d4dcdc6cd00ecbc6809ec3c1b8ac7 [log] [tgz]
author: marc <marc.spehlmann@gmail.com> Fri Apr 08 10:20:05 2016 -0500
committer: marc <marc.spehlmann@gmail.com> Mon Apr 11 15:32:10 2016 -0500
tree: 384c7b09bd52d3e5aaecbf16dc0ea0fd42703eb9
parent: eb429da4e3a4a7939d3c8c154281f4ca6589671c [diff]
diff --git a/storage/PackedRowStoreTupleStorageSubBlock.cpp b/storage/PackedRowStoreTupleStorageSubBlock.cpp
index 4cb6d02..bf086da 100644
--- a/storage/PackedRowStoreTupleStorageSubBlock.cpp
+++ b/storage/PackedRowStoreTupleStorageSubBlock.cpp

@@ -143,6 +143,8 @@
                       + header_->num_tuples * relation_.getFixedByteLength();
   const unsigned num_nullable_attrs = relation_.numNullableAttributes();
 
+  printf("(rr:%f)\n", accessor->getTupleIdSequenceVirtual()->getInternalBitVector().runRatio());
+
   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
@@ -214,6 +216,8 @@
                       + header_->num_tuples * relation_.getFixedByteLength();
   const unsigned num_nullable_attrs = relation_.numNullableAttributes();
 
+  printf("(rr:%f)\n", accessor->getTupleIdSequenceVirtual()->getInternalBitVector().runRatio());
+
   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)

diff --git a/storage/SplitRowStoreTupleStorageSubBlock.cpp b/storage/SplitRowStoreTupleStorageSubBlock.cpp
index 6c70d0f..f3af282 100644
--- a/storage/SplitRowStoreTupleStorageSubBlock.cpp
+++ b/storage/SplitRowStoreTupleStorageSubBlock.cpp

@@ -195,6 +195,8 @@
   const tuple_id original_num_tuples = header_->num_tuples;
   tuple_id pos = 0;
 
+  printf("(rr:%f)\n", accessor->getTupleIdSequenceVirtual()->getInternalBitVector().runRatio());
+
   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
@@ -391,6 +393,8 @@
   const tuple_id original_num_tuples = header_->num_tuples;
   tuple_id pos = 0;
 
+  printf("(rr:%f)\n", accessor->getTupleIdSequenceVirtual()->getInternalBitVector().runRatio());
+
   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)

diff --git a/utility/BitVector.hpp b/utility/BitVector.hpp
index a6197a5..0897ea6 100644
--- a/utility/BitVector.hpp
+++ b/utility/BitVector.hpp

@@ -749,6 +749,62 @@
     return num_bits_;
   }
 
+  // Hack to determine the ratio of runs (sequence of 1 or more tuples) to
+  // the total number of tuples. For example, a sequence of 100 selected tuples,
+  // all of the tuples being in a sequence will have a ratio of 1/100 = .01.
+  // A sequence like 10101010101010 will have a run ratio of 1.
+  const double runRatio() const {
+    double onesCount = 0;
+    double runsCount = 0;
+    bool inRun = false;
+
+    //printf("\n");
+
+    // Iterate through each bit.
+    for (std::size_t i = 0; i < size(); ++i) {
+      if (getBit(i)) {
+        ++onesCount;
+        inRun = true;
+      } else {
+        if (inRun) {
+          inRun = false;
+          runsCount++;
+        }
+      }
+    }
+
+    if (inRun) {
+      runsCount++;
+    }
+
+
+    // for (std::size_t position = 0; position < data_array_size_; ++position) {
+    //   std::size_t word = data_array_[position];
+
+    //   // Iterate through each bit in the word.
+    //   for (std::size_t bit = 0; bit < sizeof(std::size_t) * 8; ++bit) {
+    //     printf ("%lu", word & 0x1U);
+    //     if (word & 0x1U) {
+    //       ++onesCount;
+    //       inRun = true;
+    //     } else {
+    //       if (inRun) {
+    //         inRun = false;
+    //         runsCount++;
+    //       }
+    //     }
+    //     word >>= 1;
+    //   }
+    //   printf("\n");
+    // }
+
+    
+
+    // printf("\n");
+
+    return runsCount / onesCount;
+  }
+
   /**
    * @brief Find the last 0-bit (strictly before the specified position) in
    *        this BitVector.

diff --git a/utility/tests/BitVector_unittest.cpp b/utility/tests/BitVector_unittest.cpp
index 52b475a..e6601f0 100644
--- a/utility/tests/BitVector_unittest.cpp
+++ b/utility/tests/BitVector_unittest.cpp

@@ -576,6 +576,36 @@
   TestFixture::runUnionWithTest(200);
 }
 
+TYPED_TEST(BitVectorTest, RunRatioTest) {
+  std::unique_ptr<TypeParam> bit_vector(this->createBitVector(TestFixture::kBiggerBitSize));
+  bit_vector->clear();
+
+  // Make a pattern which should have a ratio of 1: "010 ... 101".
+  for (size_t i = 0; i < TestFixture::kBiggerBitSize; ++i) {
+    bit_vector->setBit(i, i%2);
+  }
+
+  EXPECT_NEAR(1.0, bit_vector->runRatio(), 0.0001);
+
+  printf("run ratio: %f\n", bit_vector->runRatio());
+
+  bit_vector->clear();
+  // Make a pattern which should have a ratio of 0.5: "110 ... 011".
+  for (size_t i = 0; i < TestFixture::kBiggerBitSize; ++i) {
+    bit_vector->setBit(i, i%3 != 0);
+  }
+
+  EXPECT_NEAR(0.5, bit_vector->runRatio(), 0.0001);
+  printf("run ratio: %f\n", bit_vector->runRatio());
+
+  // Make a word-edge case: [00000011] [110000000] where the RR == 1
+  bit_vector->clear();
+  bit_vector->setBitRange((sizeof(std::size_t) * 8) - 4, 8, true);
+
+  EXPECT_NEAR(0.125, bit_vector->runRatio(), 0.0001);
+  printf("run ratio: %f\n", bit_vector->runRatio());
+}
+
 TYPED_TEST(BitVectorTest, SetBitRangeTest) {
   std::unique_ptr<TypeParam> small_bit_vector(this->createBitVector(TestFixture::kSmallBitSize));
   small_bit_vector->clear();
commit	420afef4583d4dcdc6cd00ecbc6809ec3c1b8ac7	[log] [tgz]
author	marc <marc.spehlmann@gmail.com>	Fri Apr 08 10:20:05 2016 -0500
committer	marc <marc.spehlmann@gmail.com>	Mon Apr 11 15:32:10 2016 -0500
tree	384c7b09bd52d3e5aaecbf16dc0ea0fd42703eb9
parent	eb429da4e3a4a7939d3c8c154281f4ca6589671c [diff]